devpost-api 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devpost_api/__init__.py +56 -0
- devpost_api/_meta.py +9 -0
- devpost_api/cli.py +371 -0
- devpost_api/client.py +604 -0
- devpost_api/exceptions.py +50 -0
- devpost_api/models.py +203 -0
- devpost_api/parsing.py +404 -0
- devpost_api/py.typed +1 -0
- devpost_api-1.0.1.dist-info/METADATA +163 -0
- devpost_api-1.0.1.dist-info/RECORD +13 -0
- devpost_api-1.0.1.dist-info/WHEEL +4 -0
- devpost_api-1.0.1.dist-info/entry_points.txt +2 -0
- devpost_api-1.0.1.dist-info/licenses/LICENSE +21 -0
devpost_api/models.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Typed models used by the Devpost API wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class SoftwareSummary:
|
|
11
|
+
id: int
|
|
12
|
+
name: str
|
|
13
|
+
slug: str
|
|
14
|
+
url: str
|
|
15
|
+
tagline: str | None = None
|
|
16
|
+
description: str | None = None
|
|
17
|
+
publicly_visible: bool | None = None
|
|
18
|
+
thumbnail: str | None = None
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def from_json(cls, payload: dict[str, Any]) -> SoftwareSummary:
|
|
22
|
+
return cls(
|
|
23
|
+
id=int(payload.get("id", 0)),
|
|
24
|
+
name=str(payload.get("name", "")),
|
|
25
|
+
slug=str(payload.get("slug", "")),
|
|
26
|
+
url=str(payload.get("url", "")),
|
|
27
|
+
tagline=_maybe_str(payload.get("tagline")),
|
|
28
|
+
description=_maybe_str(payload.get("description")),
|
|
29
|
+
publicly_visible=_maybe_bool(payload.get("publicly_visible")),
|
|
30
|
+
thumbnail=_maybe_str(payload.get("thumbnail")),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class UserSummary:
|
|
36
|
+
screen_name: str
|
|
37
|
+
full_name: str | None = None
|
|
38
|
+
photo: str | None = None
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_json(cls, payload: dict[str, Any]) -> UserSummary:
|
|
42
|
+
return cls(
|
|
43
|
+
screen_name=str(payload.get("screen_name", "")),
|
|
44
|
+
full_name=_maybe_str(payload.get("full_name")),
|
|
45
|
+
photo=_maybe_str(payload.get("photo")),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class HackathonSummary:
|
|
51
|
+
id: int
|
|
52
|
+
title: str
|
|
53
|
+
open_state: str | None = None
|
|
54
|
+
url: str | None = None
|
|
55
|
+
location: str | None = None
|
|
56
|
+
thumbnail_url: str | None = None
|
|
57
|
+
submission_period_dates: str | None = None
|
|
58
|
+
registrations_count: int | None = None
|
|
59
|
+
prize_amount: str | None = None
|
|
60
|
+
themes: list[str] = field(default_factory=list)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_json(cls, payload: dict[str, Any]) -> HackathonSummary:
|
|
64
|
+
location_payload = payload.get("displayed_location")
|
|
65
|
+
location = None
|
|
66
|
+
if isinstance(location_payload, dict):
|
|
67
|
+
value = location_payload.get("location")
|
|
68
|
+
if isinstance(value, str):
|
|
69
|
+
location = value
|
|
70
|
+
|
|
71
|
+
return cls(
|
|
72
|
+
id=int(payload.get("id", 0)),
|
|
73
|
+
title=str(payload.get("title", "")),
|
|
74
|
+
open_state=_maybe_str(payload.get("open_state")),
|
|
75
|
+
url=_maybe_str(payload.get("url")),
|
|
76
|
+
location=location,
|
|
77
|
+
thumbnail_url=_maybe_str(payload.get("thumbnail_url")),
|
|
78
|
+
submission_period_dates=_maybe_str(payload.get("submission_period_dates")),
|
|
79
|
+
registrations_count=_maybe_int(payload.get("registrations_count")),
|
|
80
|
+
prize_amount=_maybe_str(payload.get("prize_amount")),
|
|
81
|
+
themes=_parse_theme_names(payload.get("themes")),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class HackathonProjectsPage:
|
|
87
|
+
hackathon_url: str
|
|
88
|
+
page: int
|
|
89
|
+
total_pages: int
|
|
90
|
+
project_urls: list[str] = field(default_factory=list)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ExternalLink:
|
|
95
|
+
url: str
|
|
96
|
+
label: str | None = None
|
|
97
|
+
domain: str | None = None
|
|
98
|
+
is_github: bool = False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class CreatorProfile:
|
|
103
|
+
name: str
|
|
104
|
+
profile_url: str | None = None
|
|
105
|
+
photo: str | None = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class GitHubRepo:
|
|
110
|
+
owner: str
|
|
111
|
+
name: str
|
|
112
|
+
full_name: str
|
|
113
|
+
html_url: str
|
|
114
|
+
description: str | None = None
|
|
115
|
+
stargazers_count: int | None = None
|
|
116
|
+
forks_count: int | None = None
|
|
117
|
+
open_issues_count: int | None = None
|
|
118
|
+
language: str | None = None
|
|
119
|
+
license_name: str | None = None
|
|
120
|
+
default_branch: str | None = None
|
|
121
|
+
updated_at: str | None = None
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def from_json(cls, payload: dict[str, Any]) -> GitHubRepo:
|
|
125
|
+
owner_name = ""
|
|
126
|
+
owner_payload = payload.get("owner")
|
|
127
|
+
if isinstance(owner_payload, dict):
|
|
128
|
+
login = owner_payload.get("login")
|
|
129
|
+
if isinstance(login, str):
|
|
130
|
+
owner_name = login
|
|
131
|
+
full_name = str(payload.get("full_name", ""))
|
|
132
|
+
repo_name = str(payload.get("name", ""))
|
|
133
|
+
if not owner_name and "/" in full_name:
|
|
134
|
+
owner_name = full_name.split("/", 1)[0]
|
|
135
|
+
license_name = None
|
|
136
|
+
license_payload = payload.get("license")
|
|
137
|
+
if isinstance(license_payload, dict):
|
|
138
|
+
license_value = license_payload.get("name")
|
|
139
|
+
if isinstance(license_value, str):
|
|
140
|
+
license_name = license_value
|
|
141
|
+
|
|
142
|
+
return cls(
|
|
143
|
+
owner=owner_name,
|
|
144
|
+
name=repo_name,
|
|
145
|
+
full_name=full_name,
|
|
146
|
+
html_url=str(payload.get("html_url", "")),
|
|
147
|
+
description=_maybe_str(payload.get("description")),
|
|
148
|
+
stargazers_count=_maybe_int(payload.get("stargazers_count")),
|
|
149
|
+
forks_count=_maybe_int(payload.get("forks_count")),
|
|
150
|
+
open_issues_count=_maybe_int(payload.get("open_issues_count")),
|
|
151
|
+
language=_maybe_str(payload.get("language")),
|
|
152
|
+
license_name=license_name,
|
|
153
|
+
default_branch=_maybe_str(payload.get("default_branch")),
|
|
154
|
+
updated_at=_maybe_str(payload.get("updated_at")),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class ProjectDetails:
|
|
160
|
+
source_url: str
|
|
161
|
+
slug: str
|
|
162
|
+
name: str
|
|
163
|
+
software_id: int | None = None
|
|
164
|
+
tagline: str | None = None
|
|
165
|
+
description: str | None = None
|
|
166
|
+
hero_image: str | None = None
|
|
167
|
+
image_urls: list[str] = field(default_factory=list)
|
|
168
|
+
tags: list[str] = field(default_factory=list)
|
|
169
|
+
submitted_to: list[str] = field(default_factory=list)
|
|
170
|
+
creators: list[str] = field(default_factory=list)
|
|
171
|
+
creator_profiles: list[CreatorProfile] = field(default_factory=list)
|
|
172
|
+
links: list[ExternalLink] = field(default_factory=list)
|
|
173
|
+
github_urls: list[str] = field(default_factory=list)
|
|
174
|
+
github_repos: list[GitHubRepo] = field(default_factory=list)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _maybe_str(value: Any) -> str | None:
|
|
178
|
+
return value if isinstance(value, str) else None
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _maybe_bool(value: Any) -> bool | None:
|
|
182
|
+
return value if isinstance(value, bool) else None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _maybe_int(value: Any) -> int | None:
|
|
186
|
+
try:
|
|
187
|
+
return int(value) if value is not None else None
|
|
188
|
+
except (TypeError, ValueError):
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _parse_theme_names(value: Any) -> list[str]:
|
|
193
|
+
if not isinstance(value, list):
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
names: list[str] = []
|
|
197
|
+
for item in value:
|
|
198
|
+
if not isinstance(item, dict):
|
|
199
|
+
continue
|
|
200
|
+
name = item.get("name")
|
|
201
|
+
if isinstance(name, str) and name and name not in names:
|
|
202
|
+
names.append(name)
|
|
203
|
+
return names
|
devpost_api/parsing.py
ADDED
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
"""HTML and URL parsing helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from html.parser import HTMLParser
|
|
8
|
+
from urllib.parse import ParseResult, urljoin, urlparse, urlunparse
|
|
9
|
+
|
|
10
|
+
from .exceptions import GitHubResolutionError
|
|
11
|
+
from .models import CreatorProfile, ExternalLink, HackathonProjectsPage, ProjectDetails
|
|
12
|
+
|
|
13
|
+
DEVPOST_PROJECT_PREFIX = "https://devpost.com/software/"
|
|
14
|
+
DEVPOST_DOMAIN = "devpost.com"
|
|
15
|
+
_GITHUB_HREF_RE = re.compile(
|
|
16
|
+
r"""href=["'](https?://github\.com/[^/\s"'<>]+/[^"\s'<>]+)["']"""
|
|
17
|
+
)
|
|
18
|
+
_SOFTWARE_ID_RE = re.compile(r"software_id=(\d+)")
|
|
19
|
+
_HREF_RE = re.compile(r"""href=["']([^"'<>]+)["']""")
|
|
20
|
+
_GALLERY_PAGE_RE = re.compile(r"project-gallery\?page=(\d+)")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def to_project_url(slug_or_url: str) -> str:
|
|
24
|
+
"""Normalize a project slug/path/url to an absolute Devpost project URL."""
|
|
25
|
+
value = slug_or_url.strip()
|
|
26
|
+
if not value:
|
|
27
|
+
raise ValueError("slug_or_url cannot be empty")
|
|
28
|
+
|
|
29
|
+
parsed = urlparse(value)
|
|
30
|
+
if parsed.scheme in {"http", "https"} and parsed.netloc:
|
|
31
|
+
return value
|
|
32
|
+
|
|
33
|
+
cleaned = value.strip("/")
|
|
34
|
+
if cleaned.startswith("software/"):
|
|
35
|
+
cleaned = cleaned.split("/", 1)[1]
|
|
36
|
+
return f"{DEVPOST_PROJECT_PREFIX}{cleaned}"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def to_hackathon_url(slug_or_url: str) -> str:
|
|
40
|
+
"""Normalize a hackathon slug/domain/url to an absolute hackathon base URL."""
|
|
41
|
+
value = slug_or_url.strip().strip("/")
|
|
42
|
+
if not value:
|
|
43
|
+
raise ValueError("slug_or_url cannot be empty")
|
|
44
|
+
|
|
45
|
+
parsed = urlparse(value)
|
|
46
|
+
if parsed.scheme in {"http", "https"} and parsed.netloc:
|
|
47
|
+
return f"{parsed.scheme}://{parsed.netloc}"
|
|
48
|
+
|
|
49
|
+
if value.endswith(f".{DEVPOST_DOMAIN}"):
|
|
50
|
+
return f"https://{value}"
|
|
51
|
+
|
|
52
|
+
if "." not in value:
|
|
53
|
+
return f"https://{value}.{DEVPOST_DOMAIN}"
|
|
54
|
+
|
|
55
|
+
return f"https://{value}"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def hackathon_gallery_url(hackathon_slug_or_url: str, page: int = 1) -> str:
|
|
59
|
+
base = to_hackathon_url(hackathon_slug_or_url).rstrip("/")
|
|
60
|
+
if page <= 1:
|
|
61
|
+
return f"{base}/project-gallery"
|
|
62
|
+
return f"{base}/project-gallery?page={page}"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def parse_hackathon_gallery_page(hackathon_url: str, page: int, html: str) -> HackathonProjectsPage:
|
|
66
|
+
project_urls = _extract_project_urls_from_gallery_html(html)
|
|
67
|
+
page_nums = [int(match.group(1)) for match in _GALLERY_PAGE_RE.finditer(html)]
|
|
68
|
+
total_pages = max(page_nums) if page_nums else max(1, page)
|
|
69
|
+
return HackathonProjectsPage(
|
|
70
|
+
hackathon_url=to_hackathon_url(hackathon_url),
|
|
71
|
+
page=page,
|
|
72
|
+
total_pages=total_pages,
|
|
73
|
+
project_urls=project_urls,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def slug_from_project_url(url: str) -> str:
|
|
78
|
+
parsed = urlparse(url)
|
|
79
|
+
path = parsed.path.strip("/")
|
|
80
|
+
if path.startswith("software/"):
|
|
81
|
+
return path.split("/", 1)[1]
|
|
82
|
+
return path
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def github_owner_repo(url_or_owner_repo: str) -> tuple[str, str]:
|
|
86
|
+
"""Return (owner, repo) from a GitHub URL or owner/repo string."""
|
|
87
|
+
value = url_or_owner_repo.strip()
|
|
88
|
+
if not value:
|
|
89
|
+
raise GitHubResolutionError("Empty GitHub identifier")
|
|
90
|
+
|
|
91
|
+
if value.count("/") == 1 and "://" not in value:
|
|
92
|
+
owner, repo = value.split("/", 1)
|
|
93
|
+
repo = repo[:-4] if repo.endswith(".git") else repo
|
|
94
|
+
if owner and repo:
|
|
95
|
+
return owner, repo
|
|
96
|
+
|
|
97
|
+
parsed = urlparse(value)
|
|
98
|
+
if parsed.netloc and parsed.netloc.lower().endswith("github.com"):
|
|
99
|
+
parts = [p for p in parsed.path.split("/") if p]
|
|
100
|
+
if len(parts) >= 2:
|
|
101
|
+
owner = parts[0]
|
|
102
|
+
repo = parts[1]
|
|
103
|
+
repo = repo[:-4] if repo.endswith(".git") else repo
|
|
104
|
+
if owner and repo:
|
|
105
|
+
return owner, repo
|
|
106
|
+
raise GitHubResolutionError(f"Cannot resolve GitHub owner/repo from: {value}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def parse_project_details(url: str, html: str) -> ProjectDetails:
|
|
110
|
+
parser = _ProjectParser(url)
|
|
111
|
+
parser.feed(html)
|
|
112
|
+
parser.close()
|
|
113
|
+
|
|
114
|
+
software_id: int | None = None
|
|
115
|
+
id_match = _SOFTWARE_ID_RE.search(html)
|
|
116
|
+
if id_match:
|
|
117
|
+
software_id = int(id_match.group(1))
|
|
118
|
+
|
|
119
|
+
github_urls = list(_unique(parser.github_urls + _extract_github_urls(html)))
|
|
120
|
+
|
|
121
|
+
return ProjectDetails(
|
|
122
|
+
source_url=url,
|
|
123
|
+
slug=slug_from_project_url(url),
|
|
124
|
+
name=parser.name or parser.og_title or "",
|
|
125
|
+
software_id=software_id,
|
|
126
|
+
tagline=parser.tagline,
|
|
127
|
+
description=parser.description,
|
|
128
|
+
hero_image=parser.hero_image,
|
|
129
|
+
image_urls=parser.image_urls,
|
|
130
|
+
tags=parser.tags,
|
|
131
|
+
submitted_to=parser.submitted_to,
|
|
132
|
+
creators=parser.creators,
|
|
133
|
+
creator_profiles=parser.creator_profiles,
|
|
134
|
+
links=parser.links,
|
|
135
|
+
github_urls=github_urls,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _extract_github_urls(html: str) -> list[str]:
|
|
140
|
+
return list(_unique(match.group(1).rstrip(").,;") for match in _GITHUB_HREF_RE.finditer(html)))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _extract_project_urls_from_gallery_html(html: str) -> list[str]:
|
|
144
|
+
project_urls: list[str] = []
|
|
145
|
+
for match in _HREF_RE.finditer(html):
|
|
146
|
+
href = match.group(1).strip().rstrip(").,;")
|
|
147
|
+
if not href:
|
|
148
|
+
continue
|
|
149
|
+
normalized = _normalize_devpost_software_href(href)
|
|
150
|
+
if normalized:
|
|
151
|
+
project_urls.append(normalized)
|
|
152
|
+
return _unique(project_urls)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _normalize_devpost_software_href(href: str) -> str | None:
|
|
156
|
+
if href.startswith("//"):
|
|
157
|
+
candidate = f"https:{href}"
|
|
158
|
+
elif href.startswith("/"):
|
|
159
|
+
candidate = urljoin("https://devpost.com", href)
|
|
160
|
+
elif href.startswith("http://") or href.startswith("https://"):
|
|
161
|
+
candidate = href
|
|
162
|
+
else:
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
parsed = urlparse(candidate)
|
|
166
|
+
netloc = parsed.netloc.lower().replace("www.", "")
|
|
167
|
+
if netloc != DEVPOST_DOMAIN:
|
|
168
|
+
return None
|
|
169
|
+
if not parsed.path.startswith("/software/"):
|
|
170
|
+
return None
|
|
171
|
+
slug = parsed.path.removeprefix("/software/").strip("/")
|
|
172
|
+
if not slug:
|
|
173
|
+
return None
|
|
174
|
+
clean_path = f"/software/{slug.split('/', 1)[0]}"
|
|
175
|
+
return urlunparse(("https", DEVPOST_DOMAIN, clean_path, "", "", ""))
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _unique(values: Iterable[str]) -> list[str]:
|
|
179
|
+
seen: set[str] = set()
|
|
180
|
+
out: list[str] = []
|
|
181
|
+
for value in values:
|
|
182
|
+
if value in seen:
|
|
183
|
+
continue
|
|
184
|
+
seen.add(value)
|
|
185
|
+
out.append(value)
|
|
186
|
+
return out
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class _ProjectParser(HTMLParser):
|
|
190
|
+
def __init__(self, source_url: str) -> None:
|
|
191
|
+
super().__init__(convert_charrefs=True)
|
|
192
|
+
self.source_url = source_url
|
|
193
|
+
self.name = ""
|
|
194
|
+
self.tagline: str | None = None
|
|
195
|
+
self.description: str | None = None
|
|
196
|
+
self.og_title = ""
|
|
197
|
+
self.hero_image: str | None = None
|
|
198
|
+
self.image_urls: list[str] = []
|
|
199
|
+
self.tags: list[str] = []
|
|
200
|
+
self.submitted_to: list[str] = []
|
|
201
|
+
self.creators: list[str] = []
|
|
202
|
+
self.creator_profiles: list[CreatorProfile] = []
|
|
203
|
+
self.links: list[ExternalLink] = []
|
|
204
|
+
self.github_urls: list[str] = []
|
|
205
|
+
|
|
206
|
+
self._in_h1 = False
|
|
207
|
+
self._in_h3 = False
|
|
208
|
+
self._active_link: dict[str, str] | None = None
|
|
209
|
+
self._active_submission_link = False
|
|
210
|
+
self._active_creator_profile: dict[str, str] | None = None
|
|
211
|
+
self._active_tag_text: str | None = None
|
|
212
|
+
|
|
213
|
+
self._links_depth = 0
|
|
214
|
+
self._built_with_depth = 0
|
|
215
|
+
self._submissions_depth = 0
|
|
216
|
+
self._team_depth = 0
|
|
217
|
+
|
|
218
|
+
def handle_starttag(self, tag: str, attrs_list: list[tuple[str, str | None]]) -> None:
|
|
219
|
+
attrs = {k: (v or "") for k, v in attrs_list}
|
|
220
|
+
|
|
221
|
+
if self._links_depth:
|
|
222
|
+
self._links_depth += 1
|
|
223
|
+
if self._built_with_depth:
|
|
224
|
+
self._built_with_depth += 1
|
|
225
|
+
if self._submissions_depth:
|
|
226
|
+
self._submissions_depth += 1
|
|
227
|
+
if self._team_depth:
|
|
228
|
+
self._team_depth += 1
|
|
229
|
+
|
|
230
|
+
if tag == "meta":
|
|
231
|
+
self._parse_meta(attrs)
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
if tag == "h1" and not self.name:
|
|
235
|
+
self._in_h1 = True
|
|
236
|
+
if tag == "h3" and self.tagline is None:
|
|
237
|
+
self._in_h3 = True
|
|
238
|
+
|
|
239
|
+
class_value = attrs.get("class", "")
|
|
240
|
+
element_id = attrs.get("id", "")
|
|
241
|
+
|
|
242
|
+
if tag == "img":
|
|
243
|
+
raw_src = attrs.get("src") or attrs.get("data-src")
|
|
244
|
+
if raw_src:
|
|
245
|
+
image_url = self._normalize_url(raw_src)
|
|
246
|
+
if image_url:
|
|
247
|
+
self._add_image(image_url)
|
|
248
|
+
if self._active_creator_profile is not None:
|
|
249
|
+
self._active_creator_profile["photo"] = image_url
|
|
250
|
+
|
|
251
|
+
if tag == "nav" and "app-links" in class_value:
|
|
252
|
+
self._links_depth = 1
|
|
253
|
+
if element_id == "built-with":
|
|
254
|
+
self._built_with_depth = 1
|
|
255
|
+
if element_id == "submissions":
|
|
256
|
+
self._submissions_depth = 1
|
|
257
|
+
if element_id == "app-team":
|
|
258
|
+
self._team_depth = 1
|
|
259
|
+
|
|
260
|
+
href = attrs.get("href")
|
|
261
|
+
if tag == "a" and href:
|
|
262
|
+
if self._links_depth:
|
|
263
|
+
self._active_link = {"href": href, "text": ""}
|
|
264
|
+
if self._submissions_depth:
|
|
265
|
+
self._active_submission_link = True
|
|
266
|
+
if self._team_depth and "user-profile-link" in class_value:
|
|
267
|
+
self._active_creator_profile = {"href": href, "name": ""}
|
|
268
|
+
if self._built_with_depth:
|
|
269
|
+
self._active_tag_text = ""
|
|
270
|
+
elif tag == "span" and self._built_with_depth and "cp-tag" in class_value:
|
|
271
|
+
self._active_tag_text = ""
|
|
272
|
+
|
|
273
|
+
def handle_endtag(self, tag: str) -> None:
|
|
274
|
+
if tag == "h1":
|
|
275
|
+
self._in_h1 = False
|
|
276
|
+
if tag == "h3":
|
|
277
|
+
self._in_h3 = False
|
|
278
|
+
if tag == "a":
|
|
279
|
+
self._finalize_link()
|
|
280
|
+
self._finalize_creator_profile()
|
|
281
|
+
self._active_submission_link = False
|
|
282
|
+
self._finalize_tag()
|
|
283
|
+
if tag == "span":
|
|
284
|
+
self._finalize_tag()
|
|
285
|
+
|
|
286
|
+
if self._links_depth:
|
|
287
|
+
self._links_depth -= 1
|
|
288
|
+
if self._built_with_depth:
|
|
289
|
+
self._built_with_depth -= 1
|
|
290
|
+
if self._submissions_depth:
|
|
291
|
+
self._submissions_depth -= 1
|
|
292
|
+
if self._team_depth:
|
|
293
|
+
self._team_depth -= 1
|
|
294
|
+
|
|
295
|
+
def handle_data(self, data: str) -> None:
|
|
296
|
+
text = " ".join(data.split())
|
|
297
|
+
if not text:
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
if self._in_h1 and not self.name:
|
|
301
|
+
self.name = text
|
|
302
|
+
elif self._in_h3 and self.tagline is None:
|
|
303
|
+
self.tagline = text
|
|
304
|
+
|
|
305
|
+
if self._active_link is not None:
|
|
306
|
+
current = self._active_link.get("text", "")
|
|
307
|
+
self._active_link["text"] = f"{current} {text}".strip()
|
|
308
|
+
|
|
309
|
+
if self._active_submission_link:
|
|
310
|
+
if text not in self.submitted_to:
|
|
311
|
+
self.submitted_to.append(text)
|
|
312
|
+
|
|
313
|
+
if self._active_creator_profile is not None:
|
|
314
|
+
current = self._active_creator_profile.get("name", "")
|
|
315
|
+
self._active_creator_profile["name"] = f"{current} {text}".strip()
|
|
316
|
+
|
|
317
|
+
if self._active_tag_text is not None:
|
|
318
|
+
self._active_tag_text = f"{self._active_tag_text} {text}".strip()
|
|
319
|
+
|
|
320
|
+
def _parse_meta(self, attrs: dict[str, str]) -> None:
|
|
321
|
+
prop = attrs.get("property", "")
|
|
322
|
+
content = attrs.get("content", "")
|
|
323
|
+
if prop == "og:title" and content:
|
|
324
|
+
self.og_title = content.removesuffix(" - Devpost")
|
|
325
|
+
if prop == "og:description" and content and self.description is None:
|
|
326
|
+
self.description = content
|
|
327
|
+
if prop == "og:image" and content:
|
|
328
|
+
image_url = self._normalize_url(content)
|
|
329
|
+
if image_url:
|
|
330
|
+
self.hero_image = image_url
|
|
331
|
+
self._add_image(image_url)
|
|
332
|
+
if attrs.get("name") == "keywords" and content:
|
|
333
|
+
for value in content.split(","):
|
|
334
|
+
self._add_tag(value)
|
|
335
|
+
|
|
336
|
+
def _finalize_link(self) -> None:
|
|
337
|
+
if self._active_link is None:
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
href = self._normalize_url(self._active_link.get("href", "").strip())
|
|
341
|
+
label = self._active_link.get("text", "").strip() or None
|
|
342
|
+
self._active_link = None
|
|
343
|
+
|
|
344
|
+
if not href:
|
|
345
|
+
return
|
|
346
|
+
parsed: ParseResult = urlparse(href)
|
|
347
|
+
domain = parsed.netloc.lower().replace("www.", "") if parsed.netloc else None
|
|
348
|
+
is_github = bool(domain and domain.endswith("github.com"))
|
|
349
|
+
link = ExternalLink(url=href, label=label, domain=domain, is_github=is_github)
|
|
350
|
+
if all(existing.url != link.url for existing in self.links):
|
|
351
|
+
self.links.append(link)
|
|
352
|
+
if is_github and href not in self.github_urls:
|
|
353
|
+
self.github_urls.append(href)
|
|
354
|
+
|
|
355
|
+
def _finalize_creator_profile(self) -> None:
|
|
356
|
+
if self._active_creator_profile is None:
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
name = self._active_creator_profile.get("name", "").strip()
|
|
360
|
+
profile_url = self._normalize_url(self._active_creator_profile.get("href", "").strip())
|
|
361
|
+
photo = self._normalize_url(self._active_creator_profile.get("photo", "").strip())
|
|
362
|
+
self._active_creator_profile = None
|
|
363
|
+
|
|
364
|
+
if not name:
|
|
365
|
+
return
|
|
366
|
+
|
|
367
|
+
creator = CreatorProfile(name=name, profile_url=profile_url, photo=photo)
|
|
368
|
+
if all(existing.name != creator.name or existing.profile_url != creator.profile_url for existing in self.creator_profiles):
|
|
369
|
+
self.creator_profiles.append(creator)
|
|
370
|
+
if name not in self.creators:
|
|
371
|
+
self.creators.append(name)
|
|
372
|
+
|
|
373
|
+
def _normalize_url(self, raw_url: str) -> str:
|
|
374
|
+
if not raw_url:
|
|
375
|
+
return ""
|
|
376
|
+
value = raw_url.strip()
|
|
377
|
+
if value.startswith("//"):
|
|
378
|
+
return f"https:{value}"
|
|
379
|
+
if value.startswith("http://") or value.startswith("https://"):
|
|
380
|
+
return value
|
|
381
|
+
return urljoin(self.source_url, value)
|
|
382
|
+
|
|
383
|
+
def _add_image(self, image_url: str) -> None:
|
|
384
|
+
lowered = image_url.lower()
|
|
385
|
+
if any(token in lowered for token in ("favicon", "avatar", "gravatar", "/icons/", "logo.svg")):
|
|
386
|
+
return
|
|
387
|
+
if image_url not in self.image_urls:
|
|
388
|
+
self.image_urls.append(image_url)
|
|
389
|
+
|
|
390
|
+
def _finalize_tag(self) -> None:
|
|
391
|
+
if self._active_tag_text is None:
|
|
392
|
+
return
|
|
393
|
+
self._add_tag(self._active_tag_text)
|
|
394
|
+
self._active_tag_text = None
|
|
395
|
+
|
|
396
|
+
def _add_tag(self, raw_value: str) -> None:
|
|
397
|
+
value = " ".join(raw_value.split()).strip()
|
|
398
|
+
if not value:
|
|
399
|
+
return
|
|
400
|
+
lowered = value.lower()
|
|
401
|
+
if lowered in {"built with", "try it out", "created by"}:
|
|
402
|
+
return
|
|
403
|
+
if all(existing.lower() != lowered for existing in self.tags):
|
|
404
|
+
self.tags.append(value)
|
devpost_api/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|