devpost-api 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devpost_api/models.py ADDED
@@ -0,0 +1,203 @@
1
+ """Typed models used by the Devpost API wrapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+
9
+ @dataclass
10
+ class SoftwareSummary:
11
+ id: int
12
+ name: str
13
+ slug: str
14
+ url: str
15
+ tagline: str | None = None
16
+ description: str | None = None
17
+ publicly_visible: bool | None = None
18
+ thumbnail: str | None = None
19
+
20
+ @classmethod
21
+ def from_json(cls, payload: dict[str, Any]) -> SoftwareSummary:
22
+ return cls(
23
+ id=int(payload.get("id", 0)),
24
+ name=str(payload.get("name", "")),
25
+ slug=str(payload.get("slug", "")),
26
+ url=str(payload.get("url", "")),
27
+ tagline=_maybe_str(payload.get("tagline")),
28
+ description=_maybe_str(payload.get("description")),
29
+ publicly_visible=_maybe_bool(payload.get("publicly_visible")),
30
+ thumbnail=_maybe_str(payload.get("thumbnail")),
31
+ )
32
+
33
+
34
+ @dataclass
35
+ class UserSummary:
36
+ screen_name: str
37
+ full_name: str | None = None
38
+ photo: str | None = None
39
+
40
+ @classmethod
41
+ def from_json(cls, payload: dict[str, Any]) -> UserSummary:
42
+ return cls(
43
+ screen_name=str(payload.get("screen_name", "")),
44
+ full_name=_maybe_str(payload.get("full_name")),
45
+ photo=_maybe_str(payload.get("photo")),
46
+ )
47
+
48
+
49
+ @dataclass
50
+ class HackathonSummary:
51
+ id: int
52
+ title: str
53
+ open_state: str | None = None
54
+ url: str | None = None
55
+ location: str | None = None
56
+ thumbnail_url: str | None = None
57
+ submission_period_dates: str | None = None
58
+ registrations_count: int | None = None
59
+ prize_amount: str | None = None
60
+ themes: list[str] = field(default_factory=list)
61
+
62
+ @classmethod
63
+ def from_json(cls, payload: dict[str, Any]) -> HackathonSummary:
64
+ location_payload = payload.get("displayed_location")
65
+ location = None
66
+ if isinstance(location_payload, dict):
67
+ value = location_payload.get("location")
68
+ if isinstance(value, str):
69
+ location = value
70
+
71
+ return cls(
72
+ id=int(payload.get("id", 0)),
73
+ title=str(payload.get("title", "")),
74
+ open_state=_maybe_str(payload.get("open_state")),
75
+ url=_maybe_str(payload.get("url")),
76
+ location=location,
77
+ thumbnail_url=_maybe_str(payload.get("thumbnail_url")),
78
+ submission_period_dates=_maybe_str(payload.get("submission_period_dates")),
79
+ registrations_count=_maybe_int(payload.get("registrations_count")),
80
+ prize_amount=_maybe_str(payload.get("prize_amount")),
81
+ themes=_parse_theme_names(payload.get("themes")),
82
+ )
83
+
84
+
85
+ @dataclass
86
+ class HackathonProjectsPage:
87
+ hackathon_url: str
88
+ page: int
89
+ total_pages: int
90
+ project_urls: list[str] = field(default_factory=list)
91
+
92
+
93
+ @dataclass
94
+ class ExternalLink:
95
+ url: str
96
+ label: str | None = None
97
+ domain: str | None = None
98
+ is_github: bool = False
99
+
100
+
101
+ @dataclass
102
+ class CreatorProfile:
103
+ name: str
104
+ profile_url: str | None = None
105
+ photo: str | None = None
106
+
107
+
108
+ @dataclass
109
+ class GitHubRepo:
110
+ owner: str
111
+ name: str
112
+ full_name: str
113
+ html_url: str
114
+ description: str | None = None
115
+ stargazers_count: int | None = None
116
+ forks_count: int | None = None
117
+ open_issues_count: int | None = None
118
+ language: str | None = None
119
+ license_name: str | None = None
120
+ default_branch: str | None = None
121
+ updated_at: str | None = None
122
+
123
+ @classmethod
124
+ def from_json(cls, payload: dict[str, Any]) -> GitHubRepo:
125
+ owner_name = ""
126
+ owner_payload = payload.get("owner")
127
+ if isinstance(owner_payload, dict):
128
+ login = owner_payload.get("login")
129
+ if isinstance(login, str):
130
+ owner_name = login
131
+ full_name = str(payload.get("full_name", ""))
132
+ repo_name = str(payload.get("name", ""))
133
+ if not owner_name and "/" in full_name:
134
+ owner_name = full_name.split("/", 1)[0]
135
+ license_name = None
136
+ license_payload = payload.get("license")
137
+ if isinstance(license_payload, dict):
138
+ license_value = license_payload.get("name")
139
+ if isinstance(license_value, str):
140
+ license_name = license_value
141
+
142
+ return cls(
143
+ owner=owner_name,
144
+ name=repo_name,
145
+ full_name=full_name,
146
+ html_url=str(payload.get("html_url", "")),
147
+ description=_maybe_str(payload.get("description")),
148
+ stargazers_count=_maybe_int(payload.get("stargazers_count")),
149
+ forks_count=_maybe_int(payload.get("forks_count")),
150
+ open_issues_count=_maybe_int(payload.get("open_issues_count")),
151
+ language=_maybe_str(payload.get("language")),
152
+ license_name=license_name,
153
+ default_branch=_maybe_str(payload.get("default_branch")),
154
+ updated_at=_maybe_str(payload.get("updated_at")),
155
+ )
156
+
157
+
158
+ @dataclass
159
+ class ProjectDetails:
160
+ source_url: str
161
+ slug: str
162
+ name: str
163
+ software_id: int | None = None
164
+ tagline: str | None = None
165
+ description: str | None = None
166
+ hero_image: str | None = None
167
+ image_urls: list[str] = field(default_factory=list)
168
+ tags: list[str] = field(default_factory=list)
169
+ submitted_to: list[str] = field(default_factory=list)
170
+ creators: list[str] = field(default_factory=list)
171
+ creator_profiles: list[CreatorProfile] = field(default_factory=list)
172
+ links: list[ExternalLink] = field(default_factory=list)
173
+ github_urls: list[str] = field(default_factory=list)
174
+ github_repos: list[GitHubRepo] = field(default_factory=list)
175
+
176
+
177
+ def _maybe_str(value: Any) -> str | None:
178
+ return value if isinstance(value, str) else None
179
+
180
+
181
+ def _maybe_bool(value: Any) -> bool | None:
182
+ return value if isinstance(value, bool) else None
183
+
184
+
185
+ def _maybe_int(value: Any) -> int | None:
186
+ try:
187
+ return int(value) if value is not None else None
188
+ except (TypeError, ValueError):
189
+ return None
190
+
191
+
192
+ def _parse_theme_names(value: Any) -> list[str]:
193
+ if not isinstance(value, list):
194
+ return []
195
+
196
+ names: list[str] = []
197
+ for item in value:
198
+ if not isinstance(item, dict):
199
+ continue
200
+ name = item.get("name")
201
+ if isinstance(name, str) and name and name not in names:
202
+ names.append(name)
203
+ return names
devpost_api/parsing.py ADDED
@@ -0,0 +1,404 @@
1
+ """HTML and URL parsing helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from collections.abc import Iterable
7
+ from html.parser import HTMLParser
8
+ from urllib.parse import ParseResult, urljoin, urlparse, urlunparse
9
+
10
+ from .exceptions import GitHubResolutionError
11
+ from .models import CreatorProfile, ExternalLink, HackathonProjectsPage, ProjectDetails
12
+
13
+ DEVPOST_PROJECT_PREFIX = "https://devpost.com/software/"
14
+ DEVPOST_DOMAIN = "devpost.com"
15
+ _GITHUB_HREF_RE = re.compile(
16
+ r"""href=["'](https?://github\.com/[^/\s"'<>]+/[^"\s'<>]+)["']"""
17
+ )
18
+ _SOFTWARE_ID_RE = re.compile(r"software_id=(\d+)")
19
+ _HREF_RE = re.compile(r"""href=["']([^"'<>]+)["']""")
20
+ _GALLERY_PAGE_RE = re.compile(r"project-gallery\?page=(\d+)")
21
+
22
+
23
+ def to_project_url(slug_or_url: str) -> str:
24
+ """Normalize a project slug/path/url to an absolute Devpost project URL."""
25
+ value = slug_or_url.strip()
26
+ if not value:
27
+ raise ValueError("slug_or_url cannot be empty")
28
+
29
+ parsed = urlparse(value)
30
+ if parsed.scheme in {"http", "https"} and parsed.netloc:
31
+ return value
32
+
33
+ cleaned = value.strip("/")
34
+ if cleaned.startswith("software/"):
35
+ cleaned = cleaned.split("/", 1)[1]
36
+ return f"{DEVPOST_PROJECT_PREFIX}{cleaned}"
37
+
38
+
39
+ def to_hackathon_url(slug_or_url: str) -> str:
40
+ """Normalize a hackathon slug/domain/url to an absolute hackathon base URL."""
41
+ value = slug_or_url.strip().strip("/")
42
+ if not value:
43
+ raise ValueError("slug_or_url cannot be empty")
44
+
45
+ parsed = urlparse(value)
46
+ if parsed.scheme in {"http", "https"} and parsed.netloc:
47
+ return f"{parsed.scheme}://{parsed.netloc}"
48
+
49
+ if value.endswith(f".{DEVPOST_DOMAIN}"):
50
+ return f"https://{value}"
51
+
52
+ if "." not in value:
53
+ return f"https://{value}.{DEVPOST_DOMAIN}"
54
+
55
+ return f"https://{value}"
56
+
57
+
58
+ def hackathon_gallery_url(hackathon_slug_or_url: str, page: int = 1) -> str:
59
+ base = to_hackathon_url(hackathon_slug_or_url).rstrip("/")
60
+ if page <= 1:
61
+ return f"{base}/project-gallery"
62
+ return f"{base}/project-gallery?page={page}"
63
+
64
+
65
+ def parse_hackathon_gallery_page(hackathon_url: str, page: int, html: str) -> HackathonProjectsPage:
66
+ project_urls = _extract_project_urls_from_gallery_html(html)
67
+ page_nums = [int(match.group(1)) for match in _GALLERY_PAGE_RE.finditer(html)]
68
+ total_pages = max(page_nums) if page_nums else max(1, page)
69
+ return HackathonProjectsPage(
70
+ hackathon_url=to_hackathon_url(hackathon_url),
71
+ page=page,
72
+ total_pages=total_pages,
73
+ project_urls=project_urls,
74
+ )
75
+
76
+
77
+ def slug_from_project_url(url: str) -> str:
78
+ parsed = urlparse(url)
79
+ path = parsed.path.strip("/")
80
+ if path.startswith("software/"):
81
+ return path.split("/", 1)[1]
82
+ return path
83
+
84
+
85
+ def github_owner_repo(url_or_owner_repo: str) -> tuple[str, str]:
86
+ """Return (owner, repo) from a GitHub URL or owner/repo string."""
87
+ value = url_or_owner_repo.strip()
88
+ if not value:
89
+ raise GitHubResolutionError("Empty GitHub identifier")
90
+
91
+ if value.count("/") == 1 and "://" not in value:
92
+ owner, repo = value.split("/", 1)
93
+ repo = repo[:-4] if repo.endswith(".git") else repo
94
+ if owner and repo:
95
+ return owner, repo
96
+
97
+ parsed = urlparse(value)
98
+ if parsed.netloc and parsed.netloc.lower().endswith("github.com"):
99
+ parts = [p for p in parsed.path.split("/") if p]
100
+ if len(parts) >= 2:
101
+ owner = parts[0]
102
+ repo = parts[1]
103
+ repo = repo[:-4] if repo.endswith(".git") else repo
104
+ if owner and repo:
105
+ return owner, repo
106
+ raise GitHubResolutionError(f"Cannot resolve GitHub owner/repo from: {value}")
107
+
108
+
109
+ def parse_project_details(url: str, html: str) -> ProjectDetails:
110
+ parser = _ProjectParser(url)
111
+ parser.feed(html)
112
+ parser.close()
113
+
114
+ software_id: int | None = None
115
+ id_match = _SOFTWARE_ID_RE.search(html)
116
+ if id_match:
117
+ software_id = int(id_match.group(1))
118
+
119
+ github_urls = list(_unique(parser.github_urls + _extract_github_urls(html)))
120
+
121
+ return ProjectDetails(
122
+ source_url=url,
123
+ slug=slug_from_project_url(url),
124
+ name=parser.name or parser.og_title or "",
125
+ software_id=software_id,
126
+ tagline=parser.tagline,
127
+ description=parser.description,
128
+ hero_image=parser.hero_image,
129
+ image_urls=parser.image_urls,
130
+ tags=parser.tags,
131
+ submitted_to=parser.submitted_to,
132
+ creators=parser.creators,
133
+ creator_profiles=parser.creator_profiles,
134
+ links=parser.links,
135
+ github_urls=github_urls,
136
+ )
137
+
138
+
139
+ def _extract_github_urls(html: str) -> list[str]:
140
+ return list(_unique(match.group(1).rstrip(").,;") for match in _GITHUB_HREF_RE.finditer(html)))
141
+
142
+
143
+ def _extract_project_urls_from_gallery_html(html: str) -> list[str]:
144
+ project_urls: list[str] = []
145
+ for match in _HREF_RE.finditer(html):
146
+ href = match.group(1).strip().rstrip(").,;")
147
+ if not href:
148
+ continue
149
+ normalized = _normalize_devpost_software_href(href)
150
+ if normalized:
151
+ project_urls.append(normalized)
152
+ return _unique(project_urls)
153
+
154
+
155
+ def _normalize_devpost_software_href(href: str) -> str | None:
156
+ if href.startswith("//"):
157
+ candidate = f"https:{href}"
158
+ elif href.startswith("/"):
159
+ candidate = urljoin("https://devpost.com", href)
160
+ elif href.startswith("http://") or href.startswith("https://"):
161
+ candidate = href
162
+ else:
163
+ return None
164
+
165
+ parsed = urlparse(candidate)
166
+ netloc = parsed.netloc.lower().replace("www.", "")
167
+ if netloc != DEVPOST_DOMAIN:
168
+ return None
169
+ if not parsed.path.startswith("/software/"):
170
+ return None
171
+ slug = parsed.path.removeprefix("/software/").strip("/")
172
+ if not slug:
173
+ return None
174
+ clean_path = f"/software/{slug.split('/', 1)[0]}"
175
+ return urlunparse(("https", DEVPOST_DOMAIN, clean_path, "", "", ""))
176
+
177
+
178
+ def _unique(values: Iterable[str]) -> list[str]:
179
+ seen: set[str] = set()
180
+ out: list[str] = []
181
+ for value in values:
182
+ if value in seen:
183
+ continue
184
+ seen.add(value)
185
+ out.append(value)
186
+ return out
187
+
188
+
189
+ class _ProjectParser(HTMLParser):
190
+ def __init__(self, source_url: str) -> None:
191
+ super().__init__(convert_charrefs=True)
192
+ self.source_url = source_url
193
+ self.name = ""
194
+ self.tagline: str | None = None
195
+ self.description: str | None = None
196
+ self.og_title = ""
197
+ self.hero_image: str | None = None
198
+ self.image_urls: list[str] = []
199
+ self.tags: list[str] = []
200
+ self.submitted_to: list[str] = []
201
+ self.creators: list[str] = []
202
+ self.creator_profiles: list[CreatorProfile] = []
203
+ self.links: list[ExternalLink] = []
204
+ self.github_urls: list[str] = []
205
+
206
+ self._in_h1 = False
207
+ self._in_h3 = False
208
+ self._active_link: dict[str, str] | None = None
209
+ self._active_submission_link = False
210
+ self._active_creator_profile: dict[str, str] | None = None
211
+ self._active_tag_text: str | None = None
212
+
213
+ self._links_depth = 0
214
+ self._built_with_depth = 0
215
+ self._submissions_depth = 0
216
+ self._team_depth = 0
217
+
218
+ def handle_starttag(self, tag: str, attrs_list: list[tuple[str, str | None]]) -> None:
219
+ attrs = {k: (v or "") for k, v in attrs_list}
220
+
221
+ if self._links_depth:
222
+ self._links_depth += 1
223
+ if self._built_with_depth:
224
+ self._built_with_depth += 1
225
+ if self._submissions_depth:
226
+ self._submissions_depth += 1
227
+ if self._team_depth:
228
+ self._team_depth += 1
229
+
230
+ if tag == "meta":
231
+ self._parse_meta(attrs)
232
+ return
233
+
234
+ if tag == "h1" and not self.name:
235
+ self._in_h1 = True
236
+ if tag == "h3" and self.tagline is None:
237
+ self._in_h3 = True
238
+
239
+ class_value = attrs.get("class", "")
240
+ element_id = attrs.get("id", "")
241
+
242
+ if tag == "img":
243
+ raw_src = attrs.get("src") or attrs.get("data-src")
244
+ if raw_src:
245
+ image_url = self._normalize_url(raw_src)
246
+ if image_url:
247
+ self._add_image(image_url)
248
+ if self._active_creator_profile is not None:
249
+ self._active_creator_profile["photo"] = image_url
250
+
251
+ if tag == "nav" and "app-links" in class_value:
252
+ self._links_depth = 1
253
+ if element_id == "built-with":
254
+ self._built_with_depth = 1
255
+ if element_id == "submissions":
256
+ self._submissions_depth = 1
257
+ if element_id == "app-team":
258
+ self._team_depth = 1
259
+
260
+ href = attrs.get("href")
261
+ if tag == "a" and href:
262
+ if self._links_depth:
263
+ self._active_link = {"href": href, "text": ""}
264
+ if self._submissions_depth:
265
+ self._active_submission_link = True
266
+ if self._team_depth and "user-profile-link" in class_value:
267
+ self._active_creator_profile = {"href": href, "name": ""}
268
+ if self._built_with_depth:
269
+ self._active_tag_text = ""
270
+ elif tag == "span" and self._built_with_depth and "cp-tag" in class_value:
271
+ self._active_tag_text = ""
272
+
273
+ def handle_endtag(self, tag: str) -> None:
274
+ if tag == "h1":
275
+ self._in_h1 = False
276
+ if tag == "h3":
277
+ self._in_h3 = False
278
+ if tag == "a":
279
+ self._finalize_link()
280
+ self._finalize_creator_profile()
281
+ self._active_submission_link = False
282
+ self._finalize_tag()
283
+ if tag == "span":
284
+ self._finalize_tag()
285
+
286
+ if self._links_depth:
287
+ self._links_depth -= 1
288
+ if self._built_with_depth:
289
+ self._built_with_depth -= 1
290
+ if self._submissions_depth:
291
+ self._submissions_depth -= 1
292
+ if self._team_depth:
293
+ self._team_depth -= 1
294
+
295
+ def handle_data(self, data: str) -> None:
296
+ text = " ".join(data.split())
297
+ if not text:
298
+ return
299
+
300
+ if self._in_h1 and not self.name:
301
+ self.name = text
302
+ elif self._in_h3 and self.tagline is None:
303
+ self.tagline = text
304
+
305
+ if self._active_link is not None:
306
+ current = self._active_link.get("text", "")
307
+ self._active_link["text"] = f"{current} {text}".strip()
308
+
309
+ if self._active_submission_link:
310
+ if text not in self.submitted_to:
311
+ self.submitted_to.append(text)
312
+
313
+ if self._active_creator_profile is not None:
314
+ current = self._active_creator_profile.get("name", "")
315
+ self._active_creator_profile["name"] = f"{current} {text}".strip()
316
+
317
+ if self._active_tag_text is not None:
318
+ self._active_tag_text = f"{self._active_tag_text} {text}".strip()
319
+
320
+ def _parse_meta(self, attrs: dict[str, str]) -> None:
321
+ prop = attrs.get("property", "")
322
+ content = attrs.get("content", "")
323
+ if prop == "og:title" and content:
324
+ self.og_title = content.removesuffix(" - Devpost")
325
+ if prop == "og:description" and content and self.description is None:
326
+ self.description = content
327
+ if prop == "og:image" and content:
328
+ image_url = self._normalize_url(content)
329
+ if image_url:
330
+ self.hero_image = image_url
331
+ self._add_image(image_url)
332
+ if attrs.get("name") == "keywords" and content:
333
+ for value in content.split(","):
334
+ self._add_tag(value)
335
+
336
+ def _finalize_link(self) -> None:
337
+ if self._active_link is None:
338
+ return
339
+
340
+ href = self._normalize_url(self._active_link.get("href", "").strip())
341
+ label = self._active_link.get("text", "").strip() or None
342
+ self._active_link = None
343
+
344
+ if not href:
345
+ return
346
+ parsed: ParseResult = urlparse(href)
347
+ domain = parsed.netloc.lower().replace("www.", "") if parsed.netloc else None
348
+ is_github = bool(domain and domain.endswith("github.com"))
349
+ link = ExternalLink(url=href, label=label, domain=domain, is_github=is_github)
350
+ if all(existing.url != link.url for existing in self.links):
351
+ self.links.append(link)
352
+ if is_github and href not in self.github_urls:
353
+ self.github_urls.append(href)
354
+
355
+ def _finalize_creator_profile(self) -> None:
356
+ if self._active_creator_profile is None:
357
+ return
358
+
359
+ name = self._active_creator_profile.get("name", "").strip()
360
+ profile_url = self._normalize_url(self._active_creator_profile.get("href", "").strip())
361
+ photo = self._normalize_url(self._active_creator_profile.get("photo", "").strip())
362
+ self._active_creator_profile = None
363
+
364
+ if not name:
365
+ return
366
+
367
+ creator = CreatorProfile(name=name, profile_url=profile_url, photo=photo)
368
+ if all(existing.name != creator.name or existing.profile_url != creator.profile_url for existing in self.creator_profiles):
369
+ self.creator_profiles.append(creator)
370
+ if name not in self.creators:
371
+ self.creators.append(name)
372
+
373
+ def _normalize_url(self, raw_url: str) -> str:
374
+ if not raw_url:
375
+ return ""
376
+ value = raw_url.strip()
377
+ if value.startswith("//"):
378
+ return f"https:{value}"
379
+ if value.startswith("http://") or value.startswith("https://"):
380
+ return value
381
+ return urljoin(self.source_url, value)
382
+
383
+ def _add_image(self, image_url: str) -> None:
384
+ lowered = image_url.lower()
385
+ if any(token in lowered for token in ("favicon", "avatar", "gravatar", "/icons/", "logo.svg")):
386
+ return
387
+ if image_url not in self.image_urls:
388
+ self.image_urls.append(image_url)
389
+
390
+ def _finalize_tag(self) -> None:
391
+ if self._active_tag_text is None:
392
+ return
393
+ self._add_tag(self._active_tag_text)
394
+ self._active_tag_text = None
395
+
396
+ def _add_tag(self, raw_value: str) -> None:
397
+ value = " ".join(raw_value.split()).strip()
398
+ if not value:
399
+ return
400
+ lowered = value.lower()
401
+ if lowered in {"built with", "try it out", "created by"}:
402
+ return
403
+ if all(existing.lower() != lowered for existing in self.tags):
404
+ self.tags.append(value)
devpost_api/py.typed ADDED
@@ -0,0 +1 @@
1
+