cli-web-producthunt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. cli_web_producthunt-0.1.0/PKG-INFO +12 -0
  2. cli_web_producthunt-0.1.0/cli_web/producthunt/README.md +52 -0
  3. cli_web_producthunt-0.1.0/cli_web/producthunt/__init__.py +0 -0
  4. cli_web_producthunt-0.1.0/cli_web/producthunt/__main__.py +4 -0
  5. cli_web_producthunt-0.1.0/cli_web/producthunt/commands/__init__.py +0 -0
  6. cli_web_producthunt-0.1.0/cli_web/producthunt/commands/posts.py +113 -0
  7. cli_web_producthunt-0.1.0/cli_web/producthunt/commands/users.py +33 -0
  8. cli_web_producthunt-0.1.0/cli_web/producthunt/core/__init__.py +0 -0
  9. cli_web_producthunt-0.1.0/cli_web/producthunt/core/client.py +301 -0
  10. cli_web_producthunt-0.1.0/cli_web/producthunt/core/exceptions.py +50 -0
  11. cli_web_producthunt-0.1.0/cli_web/producthunt/core/models.py +101 -0
  12. cli_web_producthunt-0.1.0/cli_web/producthunt/producthunt_cli.py +130 -0
  13. cli_web_producthunt-0.1.0/cli_web/producthunt/skills/SKILL.md +80 -0
  14. cli_web_producthunt-0.1.0/cli_web/producthunt/tests/TEST.md +129 -0
  15. cli_web_producthunt-0.1.0/cli_web/producthunt/tests/__init__.py +0 -0
  16. cli_web_producthunt-0.1.0/cli_web/producthunt/tests/test_core.py +395 -0
  17. cli_web_producthunt-0.1.0/cli_web/producthunt/tests/test_e2e.py +176 -0
  18. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/__init__.py +0 -0
  19. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/doctor.py +188 -0
  20. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/helpers.py +39 -0
  21. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/mcp_server.py +290 -0
  22. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/output.py +40 -0
  23. cli_web_producthunt-0.1.0/cli_web/producthunt/utils/repl_skin.py +486 -0
  24. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/PKG-INFO +12 -0
  25. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/SOURCES.txt +29 -0
  26. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/dependency_links.txt +1 -0
  27. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/entry_points.txt +2 -0
  28. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/requires.txt +4 -0
  29. cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/top_level.txt +1 -0
  30. cli_web_producthunt-0.1.0/setup.cfg +4 -0
  31. cli_web_producthunt-0.1.0/setup.py +16 -0
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: cli-web-producthunt
3
+ Version: 0.1.0
4
+ Summary: CLI for Product Hunt — browse launches, leaderboards, and product details
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: click>=8.0
7
+ Requires-Dist: curl_cffi
8
+ Requires-Dist: beautifulsoup4
9
+ Requires-Dist: prompt_toolkit>=3.0
10
+ Dynamic: requires-dist
11
+ Dynamic: requires-python
12
+ Dynamic: summary
@@ -0,0 +1,52 @@
1
+ # cli-web-producthunt
2
+
3
+ > Generated by [CLI-Anything-Web](../../../../cli-anything-web-plugin/) from [producthunt.com](https://www.producthunt.com)
4
+
5
+ CLI for browsing Product Hunt — today's top launches, leaderboards, product details, and user profiles.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ cd producthunt/agent-harness
11
+ pip install -e .
12
+ cli-web-producthunt --help
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```bash
18
+ # Today's top products
19
+ cli-web-producthunt posts list --json
20
+
21
+ # Product detail
22
+ cli-web-producthunt posts get <slug> --json
23
+
24
+ # Daily/weekly/monthly leaderboard
25
+ cli-web-producthunt posts leaderboard --json
26
+ cli-web-producthunt posts leaderboard --period weekly --json
27
+ cli-web-producthunt posts leaderboard --date 2026-03-15 --json
28
+
29
+ # User profile
30
+ cli-web-producthunt users get rrhoover --json
31
+ ```
32
+
33
+ ## Auth
34
+
35
+ **No authentication required.** The CLI scrapes public HTML using `curl_cffi` with Chrome TLS impersonation to bypass Cloudflare.
36
+
37
+ ## JSON Output
38
+
39
+ All commands support `--json` for structured output:
40
+
41
+ ```bash
42
+ cli-web-producthunt posts list --json | python -c "import sys,json; data=json.load(sys.stdin); print(f'{len(data)} products, top: {data[0][\"name\"]}')"
43
+ ```
44
+
45
+ ## Testing
46
+
47
+ ```bash
48
+ python -m pytest cli_web/producthunt/tests/ -v -s
49
+
50
+ # Subprocess tests
51
+ CLI_WEB_FORCE_INSTALLED=1 python -m pytest cli_web/producthunt/tests/ -v -s -k subprocess
52
+ ```
@@ -0,0 +1,4 @@
1
+ from .producthunt_cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,113 @@
1
+ """Posts commands for cli-web-producthunt."""
2
+
3
+ import click
4
+
5
+ from ..core.client import ProductHuntClient
6
+ from ..utils.helpers import handle_errors
7
+ from ..utils.output import print_json, print_table
8
+
9
+
10
+ @click.group()
11
+ def posts():
12
+ """Browse Product Hunt posts and leaderboard."""
13
+
14
+
15
+ @posts.command("list")
16
+ @click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
17
+ def list_posts(use_json):
18
+ """List today's posts from the Product Hunt homepage."""
19
+ with handle_errors(json_mode=use_json):
20
+ client = ProductHuntClient()
21
+ results = client.list_posts()
22
+
23
+ if use_json:
24
+ print_json(results)
25
+ else:
26
+ if not results:
27
+ click.echo("No posts found.")
28
+ return
29
+ rows = []
30
+ for p in results:
31
+ d = p.to_dict()
32
+ rows.append(
33
+ [
34
+ d.get("slug", ""),
35
+ d.get("name", ""),
36
+ str(d.get("votes_count", "")),
37
+ str(d.get("comments_count", "")),
38
+ d.get("tagline", "")[:60],
39
+ ]
40
+ )
41
+ print_table(rows, ["Slug", "Name", "Votes", "Comments", "Tagline"])
42
+
43
+
44
+ @posts.command("get")
45
+ @click.argument("slug")
46
+ @click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
47
+ def get_post(slug, use_json):
48
+ """Get details for a specific product by slug."""
49
+ with handle_errors(json_mode=use_json):
50
+ client = ProductHuntClient()
51
+ post = client.get_post(slug=slug)
52
+
53
+ if use_json:
54
+ print_json(post)
55
+ else:
56
+ d = post.to_dict()
57
+ click.echo(f"Name: {d.get('name', '')}")
58
+ click.echo(f"Slug: {d.get('slug', '')}")
59
+ click.echo(f"Tagline: {d.get('tagline', '')}")
60
+ click.echo(f"Votes: {d.get('votes_count', '')}")
61
+ click.echo(f"Comments: {d.get('comments_count', '')}")
62
+ click.echo(f"URL: {d.get('url', '')}")
63
+ if d.get("description"):
64
+ click.echo(f"Description: {d['description']}")
65
+ if d.get("topics"):
66
+ click.echo(f"Topics: {', '.join(d['topics'])}")
67
+
68
+
69
+ @posts.command("leaderboard")
70
+ @click.option(
71
+ "--period",
72
+ type=click.Choice(["daily", "weekly", "monthly"], case_sensitive=False),
73
+ default="daily",
74
+ help="Leaderboard period (default: daily).",
75
+ )
76
+ @click.option("--date", "date_str", default=None, help="Date as YYYY-MM-DD (optional).")
77
+ @click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
78
+ def leaderboard(period, date_str, use_json):
79
+ """Show the Product Hunt leaderboard."""
80
+ year = month = day = None
81
+ if date_str:
82
+ parts = date_str.split("-")
83
+ if len(parts) >= 1:
84
+ year = int(parts[0])
85
+ if len(parts) >= 2:
86
+ month = int(parts[1])
87
+ if len(parts) >= 3:
88
+ day = int(parts[2])
89
+
90
+ with handle_errors(json_mode=use_json):
91
+ client = ProductHuntClient()
92
+ results = client.list_leaderboard(period=period.lower(), year=year, month=month, day=day)
93
+
94
+ if use_json:
95
+ print_json(results)
96
+ else:
97
+ if not results:
98
+ click.echo("No posts found on leaderboard.")
99
+ return
100
+ rows = []
101
+ for i, p in enumerate(results, 1):
102
+ d = p.to_dict()
103
+ rank = d.get("rank") or i
104
+ rows.append(
105
+ [
106
+ str(rank),
107
+ d.get("name", ""),
108
+ str(d.get("votes_count", "")),
109
+ str(d.get("comments_count", "")),
110
+ d.get("tagline", "")[:50],
111
+ ]
112
+ )
113
+ print_table(rows, ["#", "Name", "Votes", "Comments", "Tagline"])
@@ -0,0 +1,33 @@
1
+ """Users commands for cli-web-producthunt."""
2
+
3
+ import click
4
+
5
+ from ..core.client import ProductHuntClient
6
+ from ..utils.helpers import handle_errors
7
+ from ..utils.output import print_json
8
+
9
+
10
+ @click.group()
11
+ def users():
12
+ """Look up Product Hunt users."""
13
+
14
+
15
+ @users.command("get")
16
+ @click.argument("username")
17
+ @click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
18
+ def get_user(username, use_json):
19
+ """Get details for a user by username."""
20
+ with handle_errors(json_mode=use_json):
21
+ client = ProductHuntClient()
22
+ user = client.get_user(username=username)
23
+
24
+ if use_json:
25
+ print_json(user)
26
+ else:
27
+ d = user.to_dict()
28
+ click.echo(f"Username: {d.get('username', '')}")
29
+ click.echo(f"Name: {d.get('name', '')}")
30
+ click.echo(f"Headline: {d.get('headline', '')}")
31
+ click.echo(f"Followers: {d.get('followers_count', 0)}")
32
+ if d.get("website_url"):
33
+ click.echo(f"Website: {d['website_url']}")
@@ -0,0 +1,301 @@
1
+ """HTML-scraping client for Product Hunt using curl_cffi.
2
+
3
+ No API tokens or cookies required -- curl_cffi with Chrome TLS
4
+ impersonation bypasses Cloudflare protection automatically.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+
11
+ from bs4 import BeautifulSoup
12
+ from curl_cffi import requests as curl_requests
13
+
14
+ from .exceptions import (
15
+ AuthError,
16
+ NetworkError,
17
+ NotFoundError,
18
+ RateLimitError,
19
+ ServerError,
20
+ )
21
+ from .models import Post, User
22
+
23
+ BASE_URL = "https://www.producthunt.com"
24
+
25
+
26
+ class ProductHuntClient:
27
+ """Scrape Product Hunt pages with Chrome TLS impersonation."""
28
+
29
+ def __init__(self) -> None:
30
+ self._session = curl_requests.Session(impersonate="chrome131")
31
+
32
+ def close(self) -> None:
33
+ self._session.close()
34
+
35
+ def __enter__(self) -> ProductHuntClient:
36
+ return self
37
+
38
+ def __exit__(self, *exc) -> None:
39
+ self.close()
40
+
41
+ # ------------------------------------------------------------------
42
+ # Low-level transport
43
+ # ------------------------------------------------------------------
44
+
45
+ def _get(self, url: str) -> BeautifulSoup:
46
+ """Fetch *url* and return a parsed BeautifulSoup tree.
47
+
48
+ Maps HTTP status codes to domain exceptions.
49
+ """
50
+ try:
51
+ resp = self._session.get(url, timeout=30)
52
+ except Exception as exc:
53
+ raise NetworkError(f"Request failed: {exc}") from exc
54
+
55
+ status = resp.status_code
56
+ if status == 403:
57
+ raise AuthError(
58
+ "Blocked by Cloudflare (HTTP 403). Try again later.",
59
+ recoverable=True,
60
+ )
61
+ if status == 404:
62
+ raise NotFoundError(f"Page not found: {url}")
63
+ if status == 429:
64
+ retry_after = resp.headers.get("Retry-After")
65
+ raise RateLimitError(
66
+ "Rate limited by Product Hunt",
67
+ retry_after=float(retry_after) if retry_after else None,
68
+ )
69
+ if status >= 500:
70
+ raise ServerError(f"Server error (HTTP {status})", status_code=status)
71
+ if status != 200:
72
+ raise ServerError(f"Unexpected HTTP {status}: {url}", status_code=status)
73
+
74
+ return BeautifulSoup(resp.text, "html.parser")
75
+
76
+ # ------------------------------------------------------------------
77
+ # Shared card-parsing helper
78
+ # ------------------------------------------------------------------
79
+
80
+ @staticmethod
81
+ def _parse_post_cards(soup: BeautifulSoup) -> list[Post]:
82
+ """Extract Post objects from a page containing post-name-* cards."""
83
+ posts: list[Post] = []
84
+ post_names = soup.find_all(attrs={"data-test": re.compile(r"^post-name-")})
85
+
86
+ for card in post_names:
87
+ data_test = card.get("data-test", "")
88
+ post_id = data_test.replace("post-name-", "")
89
+
90
+ # Name and slug from the <a> link inside the card
91
+ link = card.find("a", href=True)
92
+ if not link:
93
+ continue
94
+ name = link.get_text(strip=True)
95
+ href = link["href"]
96
+ # href may be /posts/<slug> or /products/<slug>
97
+ slug = href.rsplit("/", 1)[-1] if "/" in href else href
98
+
99
+ # Tagline from the next sibling element
100
+ tagline_el = card.find_next_sibling()
101
+ tagline = tagline_el.get_text(strip=True) if tagline_el else ""
102
+
103
+ # Walk up to find the full card container (up to 8 levels)
104
+ container = card
105
+ for _ in range(8):
106
+ if container.parent:
107
+ container = container.parent
108
+ if container.get("data-test", "").startswith("post-item"):
109
+ break
110
+
111
+ # Votes and comments from <button> elements with numeric text
112
+ buttons = container.find_all("button")
113
+ nums = [
114
+ int(btn.get_text(strip=True))
115
+ for btn in buttons
116
+ if btn.get_text(strip=True).isdigit()
117
+ ]
118
+ comments_count = nums[0] if len(nums) >= 1 else 0
119
+ votes_count = nums[1] if len(nums) >= 2 else 0
120
+
121
+ # Topics from /topics/ links
122
+ topic_links = [
123
+ a.get_text(strip=True)
124
+ for a in container.find_all("a", href=lambda h: h and "/topics/" in h)
125
+ ]
126
+
127
+ # Thumbnail from <img> in the container
128
+ img = container.find("img", src=True)
129
+ thumbnail_url = img["src"] if img else None
130
+
131
+ posts.append(
132
+ Post.from_card(
133
+ {
134
+ "id": post_id,
135
+ "name": name,
136
+ "tagline": tagline,
137
+ "slug": slug,
138
+ "votes_count": votes_count,
139
+ "comments_count": comments_count,
140
+ "topics": topic_links,
141
+ "thumbnail_url": thumbnail_url,
142
+ }
143
+ )
144
+ )
145
+
146
+ return posts
147
+
148
+ # ------------------------------------------------------------------
149
+ # Posts
150
+ # ------------------------------------------------------------------
151
+
152
+ def list_posts(self) -> list[Post]:
153
+ """Scrape the Product Hunt homepage for today's posts."""
154
+ soup = self._get(BASE_URL)
155
+ return self._parse_post_cards(soup)
156
+
157
+ def get_post(self, slug: str) -> Post:
158
+ """Scrape a single product detail page."""
159
+ url = f"{BASE_URL}/products/{slug}"
160
+ soup = self._get(url)
161
+
162
+ # Title from <title> tag (usually "Name - Product Hunt")
163
+ title_tag = soup.find("title")
164
+ title = title_tag.get_text(strip=True) if title_tag else slug
165
+ # Clean up " - Product Hunt" or " | Product Hunt" suffix
166
+ for sep in (" - Product Hunt", " | Product Hunt"):
167
+ if title.endswith(sep):
168
+ title = title[: -len(sep)]
169
+
170
+ # Description from meta tag
171
+ meta_desc = soup.find("meta", attrs={"name": "description"})
172
+ description = meta_desc["content"] if meta_desc and meta_desc.get("content") else None
173
+
174
+ # Thumbnail from og:image
175
+ og_image = soup.find("meta", attrs={"property": "og:image"})
176
+ thumbnail_url = og_image["content"] if og_image and og_image.get("content") else None
177
+
178
+ # Try to extract votes/comments from the detail page
179
+ votes_count = 0
180
+ comments_count = 0
181
+ buttons = soup.find_all("button")
182
+ nums = [
183
+ int(btn.get_text(strip=True)) for btn in buttons if btn.get_text(strip=True).isdigit()
184
+ ]
185
+ if len(nums) >= 2:
186
+ comments_count = nums[0]
187
+ votes_count = nums[1]
188
+ elif len(nums) == 1:
189
+ votes_count = nums[0]
190
+
191
+ # Topics from /topics/ links
192
+ topics = [
193
+ a.get_text(strip=True) for a in soup.find_all("a", href=lambda h: h and "/topics/" in h)
194
+ ]
195
+
196
+ return Post(
197
+ id=slug,
198
+ name=title,
199
+ tagline=description or "",
200
+ slug=slug,
201
+ url=f"{BASE_URL}/products/{slug}",
202
+ description=description,
203
+ votes_count=votes_count,
204
+ comments_count=comments_count,
205
+ topics=topics,
206
+ thumbnail_url=thumbnail_url,
207
+ )
208
+
209
+ # ------------------------------------------------------------------
210
+ # Leaderboard
211
+ # ------------------------------------------------------------------
212
+
213
+ def list_leaderboard(
214
+ self,
215
+ period: str = "daily",
216
+ year: int | None = None,
217
+ month: int | None = None,
218
+ day: int | None = None,
219
+ ) -> list[Post]:
220
+ """Scrape the Product Hunt leaderboard.
221
+
222
+ *period* must be one of ``daily``, ``weekly``, ``monthly``.
223
+ Date components are optional; when omitted today's date is used
224
+ for ``daily``, or the plain ``/leaderboard`` page for others.
225
+
226
+ The only supported URL pattern is ``/leaderboard/daily/YYYY/M/D``.
227
+ Product Hunt does not expose weekly or monthly leaderboard pages
228
+ as scrapable lists, so *period* is accepted for API compatibility
229
+ but always resolves to the daily leaderboard.
230
+ """
231
+ if year is not None and month is not None and day is not None:
232
+ url = f"{BASE_URL}/leaderboard/daily/{year}/{month}/{day}"
233
+ else:
234
+ # Default to today
235
+ from datetime import date as _date
236
+
237
+ today = _date.today()
238
+ url = f"{BASE_URL}/leaderboard/daily/{today.year}/{today.month}/{today.day}"
239
+
240
+ soup = self._get(url)
241
+ return self._parse_post_cards(soup)
242
+
243
+ # ------------------------------------------------------------------
244
+ # Users
245
+ # ------------------------------------------------------------------
246
+
247
+ def get_user(self, username: str) -> User:
248
+ """Scrape a user's public profile page."""
249
+ url = f"{BASE_URL}/@{username}"
250
+ soup = self._get(url)
251
+
252
+ # Name — try og:title first (usually cleaner), then <title>
253
+ og_title = soup.find("meta", attrs={"property": "og:title"})
254
+ if og_title and og_title.get("content"):
255
+ name = og_title["content"]
256
+ else:
257
+ title_tag = soup.find("title")
258
+ name = title_tag.get_text(strip=True) if title_tag else ""
259
+
260
+ # Clean suffixes like " - Product Hunt", "'s profile on Product Hunt"
261
+ for suffix in (
262
+ " - Product Hunt",
263
+ " | Product Hunt",
264
+ "'s profile on Product Hunt",
265
+ ):
266
+ if name.endswith(suffix):
267
+ name = name[: -len(suffix)]
268
+ # Strip "(@username)" if present
269
+ paren = f"(@{username})"
270
+ if paren in name:
271
+ name = name.replace(paren, "").strip()
272
+ # Strip leading/trailing quotes or whitespace
273
+ name = name.strip("\" '")
274
+
275
+ # Headline from meta description
276
+ meta_desc = soup.find("meta", attrs={"name": "description"})
277
+ headline = meta_desc["content"] if meta_desc and meta_desc.get("content") else None
278
+
279
+ # Profile image from og:image
280
+ og_image = soup.find("meta", attrs={"property": "og:image"})
281
+ profile_image = og_image["content"] if og_image and og_image.get("content") else None
282
+
283
+ # Followers — look for text matching "N Followers" or "N followers"
284
+ followers_count = 0
285
+ followers_pattern = re.compile(r"([\d,]+)\s+[Ff]ollowers?")
286
+ for text_el in soup.find_all(string=followers_pattern):
287
+ m = followers_pattern.search(text_el)
288
+ if m:
289
+ followers_count = int(m.group(1).replace(",", ""))
290
+ break
291
+
292
+ return User.from_card(
293
+ {
294
+ "id": username,
295
+ "name": name or username,
296
+ "username": username,
297
+ "headline": headline,
298
+ "profile_image": profile_image,
299
+ "followers_count": followers_count,
300
+ }
301
+ )
@@ -0,0 +1,50 @@
1
+ """Domain-specific exception hierarchy for cli-web-producthunt."""
2
+
3
+
4
+ class AppError(Exception):
5
+ """Base for all producthunt CLI errors."""
6
+
7
+ def to_dict(self):
8
+ return {"error": True, "code": "UNKNOWN", "message": str(self)}
9
+
10
+
11
+ class AuthError(AppError):
12
+ def __init__(self, message: str, recoverable: bool = True):
13
+ self.recoverable = recoverable
14
+ super().__init__(message)
15
+
16
+ def to_dict(self):
17
+ return {"error": True, "code": "AUTH_EXPIRED", "message": str(self)}
18
+
19
+
20
+ class RateLimitError(AppError):
21
+ def __init__(self, message: str, retry_after: float | None = None):
22
+ self.retry_after = retry_after
23
+ super().__init__(message)
24
+
25
+ def to_dict(self):
26
+ return {
27
+ "error": True,
28
+ "code": "RATE_LIMITED",
29
+ "message": str(self),
30
+ "retry_after": self.retry_after,
31
+ }
32
+
33
+
34
+ class NetworkError(AppError):
35
+ def to_dict(self):
36
+ return {"error": True, "code": "NETWORK_ERROR", "message": str(self)}
37
+
38
+
39
+ class ServerError(AppError):
40
+ def __init__(self, message: str, status_code: int = 500):
41
+ self.status_code = status_code
42
+ super().__init__(message)
43
+
44
+ def to_dict(self):
45
+ return {"error": True, "code": "SERVER_ERROR", "message": str(self)}
46
+
47
+
48
+ class NotFoundError(AppError):
49
+ def to_dict(self):
50
+ return {"error": True, "code": "NOT_FOUND", "message": str(self)}
@@ -0,0 +1,101 @@
1
+ """Data models for Product Hunt scraped responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ @dataclass
10
+ class Post:
11
+ id: str
12
+ name: str
13
+ tagline: str
14
+ slug: str
15
+ url: str
16
+ description: str | None = None
17
+ votes_count: int = 0
18
+ comments_count: int = 0
19
+ topics: list[str] = field(default_factory=list)
20
+ thumbnail_url: str | None = None
21
+ rank: int | None = None
22
+
23
+ def to_dict(self) -> dict:
24
+ return {
25
+ "id": self.id,
26
+ "name": self.name,
27
+ "tagline": self.tagline,
28
+ "slug": self.slug,
29
+ "url": self.url,
30
+ "description": self.description,
31
+ "votes_count": self.votes_count,
32
+ "comments_count": self.comments_count,
33
+ "topics": self.topics,
34
+ "thumbnail_url": self.thumbnail_url,
35
+ "rank": self.rank,
36
+ }
37
+
38
+ @classmethod
39
+ def from_card(cls, card_data: dict) -> Post:
40
+ """Build a Post from scraped card data.
41
+
42
+ ``card_data`` keys: id, name, tagline, slug, votes_count,
43
+ comments_count, topics, thumbnail_url.
44
+ """
45
+ name = card_data.get("name", "")
46
+ # Extract rank from name prefix like "1. Stitch..."
47
+ rank = None
48
+ rank_match = re.match(r"^(\d+)\.\s+", name)
49
+ if rank_match:
50
+ rank = int(rank_match.group(1))
51
+ name = name[rank_match.end() :]
52
+
53
+ slug = card_data.get("slug", "")
54
+ return cls(
55
+ id=card_data.get("id", ""),
56
+ name=name,
57
+ tagline=card_data.get("tagline", ""),
58
+ slug=slug,
59
+ url=f"https://www.producthunt.com/products/{slug}" if slug else "",
60
+ description=card_data.get("description"),
61
+ votes_count=card_data.get("votes_count", 0),
62
+ comments_count=card_data.get("comments_count", 0),
63
+ topics=card_data.get("topics", []),
64
+ thumbnail_url=card_data.get("thumbnail_url"),
65
+ rank=rank,
66
+ )
67
+
68
+
69
+ @dataclass
70
+ class User:
71
+ id: str
72
+ name: str
73
+ username: str
74
+ headline: str | None = None
75
+ profile_image: str | None = None
76
+ website_url: str | None = None
77
+ followers_count: int = 0
78
+
79
+ def to_dict(self) -> dict:
80
+ return {
81
+ "id": self.id,
82
+ "name": self.name,
83
+ "username": self.username,
84
+ "headline": self.headline,
85
+ "profile_image": self.profile_image,
86
+ "website_url": self.website_url,
87
+ "followers_count": self.followers_count,
88
+ }
89
+
90
+ @classmethod
91
+ def from_card(cls, card_data: dict) -> User:
92
+ """Build a User from scraped profile data."""
93
+ return cls(
94
+ id=card_data.get("id", ""),
95
+ name=card_data.get("name", ""),
96
+ username=card_data.get("username", ""),
97
+ headline=card_data.get("headline"),
98
+ profile_image=card_data.get("profile_image"),
99
+ website_url=card_data.get("website_url"),
100
+ followers_count=card_data.get("followers_count", 0),
101
+ )