cli-web-producthunt 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli_web_producthunt-0.1.0/PKG-INFO +12 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/README.md +52 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/__init__.py +0 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/__main__.py +4 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/commands/__init__.py +0 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/commands/posts.py +113 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/commands/users.py +33 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/core/__init__.py +0 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/core/client.py +301 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/core/exceptions.py +50 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/core/models.py +101 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/producthunt_cli.py +130 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/skills/SKILL.md +80 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/tests/TEST.md +129 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/tests/__init__.py +0 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/tests/test_core.py +395 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/tests/test_e2e.py +176 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/__init__.py +0 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/doctor.py +188 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/helpers.py +39 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/mcp_server.py +290 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/output.py +40 -0
- cli_web_producthunt-0.1.0/cli_web/producthunt/utils/repl_skin.py +486 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/PKG-INFO +12 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/SOURCES.txt +29 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/dependency_links.txt +1 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/entry_points.txt +2 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/requires.txt +4 -0
- cli_web_producthunt-0.1.0/cli_web_producthunt.egg-info/top_level.txt +1 -0
- cli_web_producthunt-0.1.0/setup.cfg +4 -0
- cli_web_producthunt-0.1.0/setup.py +16 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cli-web-producthunt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI for Product Hunt — browse launches, leaderboards, and product details
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: click>=8.0
|
|
7
|
+
Requires-Dist: curl_cffi
|
|
8
|
+
Requires-Dist: beautifulsoup4
|
|
9
|
+
Requires-Dist: prompt_toolkit>=3.0
|
|
10
|
+
Dynamic: requires-dist
|
|
11
|
+
Dynamic: requires-python
|
|
12
|
+
Dynamic: summary
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# cli-web-producthunt
|
|
2
|
+
|
|
3
|
+
> Generated by [CLI-Anything-Web](../../../../cli-anything-web-plugin/) from [producthunt.com](https://www.producthunt.com)
|
|
4
|
+
|
|
5
|
+
CLI for browsing Product Hunt — today's top launches, leaderboards, product details, and user profiles.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd producthunt/agent-harness
|
|
11
|
+
pip install -e .
|
|
12
|
+
cli-web-producthunt --help
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Today's top products
|
|
19
|
+
cli-web-producthunt posts list --json
|
|
20
|
+
|
|
21
|
+
# Product detail
|
|
22
|
+
cli-web-producthunt posts get <slug> --json
|
|
23
|
+
|
|
24
|
+
# Daily/weekly/monthly leaderboard
|
|
25
|
+
cli-web-producthunt posts leaderboard --json
|
|
26
|
+
cli-web-producthunt posts leaderboard --period weekly --json
|
|
27
|
+
cli-web-producthunt posts leaderboard --date 2026-03-15 --json
|
|
28
|
+
|
|
29
|
+
# User profile
|
|
30
|
+
cli-web-producthunt users get rrhoover --json
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Auth
|
|
34
|
+
|
|
35
|
+
**No authentication required.** The CLI scrapes public HTML using `curl_cffi` with Chrome TLS impersonation to bypass Cloudflare.
|
|
36
|
+
|
|
37
|
+
## JSON Output
|
|
38
|
+
|
|
39
|
+
All commands support `--json` for structured output:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
cli-web-producthunt posts list --json | python -c "import sys,json; data=json.load(sys.stdin); print(f'{len(data)} products, top: {data[0][\"name\"]}')"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Testing
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
python -m pytest cli_web/producthunt/tests/ -v -s
|
|
49
|
+
|
|
50
|
+
# Subprocess tests
|
|
51
|
+
CLI_WEB_FORCE_INSTALLED=1 python -m pytest cli_web/producthunt/tests/ -v -s -k subprocess
|
|
52
|
+
```
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Posts commands for cli-web-producthunt."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from ..core.client import ProductHuntClient
|
|
6
|
+
from ..utils.helpers import handle_errors
|
|
7
|
+
from ..utils.output import print_json, print_table
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.group()
|
|
11
|
+
def posts():
|
|
12
|
+
"""Browse Product Hunt posts and leaderboard."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@posts.command("list")
|
|
16
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
17
|
+
def list_posts(use_json):
|
|
18
|
+
"""List today's posts from the Product Hunt homepage."""
|
|
19
|
+
with handle_errors(json_mode=use_json):
|
|
20
|
+
client = ProductHuntClient()
|
|
21
|
+
results = client.list_posts()
|
|
22
|
+
|
|
23
|
+
if use_json:
|
|
24
|
+
print_json(results)
|
|
25
|
+
else:
|
|
26
|
+
if not results:
|
|
27
|
+
click.echo("No posts found.")
|
|
28
|
+
return
|
|
29
|
+
rows = []
|
|
30
|
+
for p in results:
|
|
31
|
+
d = p.to_dict()
|
|
32
|
+
rows.append(
|
|
33
|
+
[
|
|
34
|
+
d.get("slug", ""),
|
|
35
|
+
d.get("name", ""),
|
|
36
|
+
str(d.get("votes_count", "")),
|
|
37
|
+
str(d.get("comments_count", "")),
|
|
38
|
+
d.get("tagline", "")[:60],
|
|
39
|
+
]
|
|
40
|
+
)
|
|
41
|
+
print_table(rows, ["Slug", "Name", "Votes", "Comments", "Tagline"])
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@posts.command("get")
|
|
45
|
+
@click.argument("slug")
|
|
46
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
47
|
+
def get_post(slug, use_json):
|
|
48
|
+
"""Get details for a specific product by slug."""
|
|
49
|
+
with handle_errors(json_mode=use_json):
|
|
50
|
+
client = ProductHuntClient()
|
|
51
|
+
post = client.get_post(slug=slug)
|
|
52
|
+
|
|
53
|
+
if use_json:
|
|
54
|
+
print_json(post)
|
|
55
|
+
else:
|
|
56
|
+
d = post.to_dict()
|
|
57
|
+
click.echo(f"Name: {d.get('name', '')}")
|
|
58
|
+
click.echo(f"Slug: {d.get('slug', '')}")
|
|
59
|
+
click.echo(f"Tagline: {d.get('tagline', '')}")
|
|
60
|
+
click.echo(f"Votes: {d.get('votes_count', '')}")
|
|
61
|
+
click.echo(f"Comments: {d.get('comments_count', '')}")
|
|
62
|
+
click.echo(f"URL: {d.get('url', '')}")
|
|
63
|
+
if d.get("description"):
|
|
64
|
+
click.echo(f"Description: {d['description']}")
|
|
65
|
+
if d.get("topics"):
|
|
66
|
+
click.echo(f"Topics: {', '.join(d['topics'])}")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@posts.command("leaderboard")
|
|
70
|
+
@click.option(
|
|
71
|
+
"--period",
|
|
72
|
+
type=click.Choice(["daily", "weekly", "monthly"], case_sensitive=False),
|
|
73
|
+
default="daily",
|
|
74
|
+
help="Leaderboard period (default: daily).",
|
|
75
|
+
)
|
|
76
|
+
@click.option("--date", "date_str", default=None, help="Date as YYYY-MM-DD (optional).")
|
|
77
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
78
|
+
def leaderboard(period, date_str, use_json):
|
|
79
|
+
"""Show the Product Hunt leaderboard."""
|
|
80
|
+
year = month = day = None
|
|
81
|
+
if date_str:
|
|
82
|
+
parts = date_str.split("-")
|
|
83
|
+
if len(parts) >= 1:
|
|
84
|
+
year = int(parts[0])
|
|
85
|
+
if len(parts) >= 2:
|
|
86
|
+
month = int(parts[1])
|
|
87
|
+
if len(parts) >= 3:
|
|
88
|
+
day = int(parts[2])
|
|
89
|
+
|
|
90
|
+
with handle_errors(json_mode=use_json):
|
|
91
|
+
client = ProductHuntClient()
|
|
92
|
+
results = client.list_leaderboard(period=period.lower(), year=year, month=month, day=day)
|
|
93
|
+
|
|
94
|
+
if use_json:
|
|
95
|
+
print_json(results)
|
|
96
|
+
else:
|
|
97
|
+
if not results:
|
|
98
|
+
click.echo("No posts found on leaderboard.")
|
|
99
|
+
return
|
|
100
|
+
rows = []
|
|
101
|
+
for i, p in enumerate(results, 1):
|
|
102
|
+
d = p.to_dict()
|
|
103
|
+
rank = d.get("rank") or i
|
|
104
|
+
rows.append(
|
|
105
|
+
[
|
|
106
|
+
str(rank),
|
|
107
|
+
d.get("name", ""),
|
|
108
|
+
str(d.get("votes_count", "")),
|
|
109
|
+
str(d.get("comments_count", "")),
|
|
110
|
+
d.get("tagline", "")[:50],
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
print_table(rows, ["#", "Name", "Votes", "Comments", "Tagline"])
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Users commands for cli-web-producthunt."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from ..core.client import ProductHuntClient
|
|
6
|
+
from ..utils.helpers import handle_errors
|
|
7
|
+
from ..utils.output import print_json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.group()
|
|
11
|
+
def users():
|
|
12
|
+
"""Look up Product Hunt users."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@users.command("get")
|
|
16
|
+
@click.argument("username")
|
|
17
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
18
|
+
def get_user(username, use_json):
|
|
19
|
+
"""Get details for a user by username."""
|
|
20
|
+
with handle_errors(json_mode=use_json):
|
|
21
|
+
client = ProductHuntClient()
|
|
22
|
+
user = client.get_user(username=username)
|
|
23
|
+
|
|
24
|
+
if use_json:
|
|
25
|
+
print_json(user)
|
|
26
|
+
else:
|
|
27
|
+
d = user.to_dict()
|
|
28
|
+
click.echo(f"Username: {d.get('username', '')}")
|
|
29
|
+
click.echo(f"Name: {d.get('name', '')}")
|
|
30
|
+
click.echo(f"Headline: {d.get('headline', '')}")
|
|
31
|
+
click.echo(f"Followers: {d.get('followers_count', 0)}")
|
|
32
|
+
if d.get("website_url"):
|
|
33
|
+
click.echo(f"Website: {d['website_url']}")
|
|
File without changes
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""HTML-scraping client for Product Hunt using curl_cffi.
|
|
2
|
+
|
|
3
|
+
No API tokens or cookies required -- curl_cffi with Chrome TLS
|
|
4
|
+
impersonation bypasses Cloudflare protection automatically.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from bs4 import BeautifulSoup
|
|
12
|
+
from curl_cffi import requests as curl_requests
|
|
13
|
+
|
|
14
|
+
from .exceptions import (
|
|
15
|
+
AuthError,
|
|
16
|
+
NetworkError,
|
|
17
|
+
NotFoundError,
|
|
18
|
+
RateLimitError,
|
|
19
|
+
ServerError,
|
|
20
|
+
)
|
|
21
|
+
from .models import Post, User
|
|
22
|
+
|
|
23
|
+
BASE_URL = "https://www.producthunt.com"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ProductHuntClient:
|
|
27
|
+
"""Scrape Product Hunt pages with Chrome TLS impersonation."""
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self._session = curl_requests.Session(impersonate="chrome131")
|
|
31
|
+
|
|
32
|
+
def close(self) -> None:
|
|
33
|
+
self._session.close()
|
|
34
|
+
|
|
35
|
+
def __enter__(self) -> ProductHuntClient:
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def __exit__(self, *exc) -> None:
|
|
39
|
+
self.close()
|
|
40
|
+
|
|
41
|
+
# ------------------------------------------------------------------
|
|
42
|
+
# Low-level transport
|
|
43
|
+
# ------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
def _get(self, url: str) -> BeautifulSoup:
|
|
46
|
+
"""Fetch *url* and return a parsed BeautifulSoup tree.
|
|
47
|
+
|
|
48
|
+
Maps HTTP status codes to domain exceptions.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
resp = self._session.get(url, timeout=30)
|
|
52
|
+
except Exception as exc:
|
|
53
|
+
raise NetworkError(f"Request failed: {exc}") from exc
|
|
54
|
+
|
|
55
|
+
status = resp.status_code
|
|
56
|
+
if status == 403:
|
|
57
|
+
raise AuthError(
|
|
58
|
+
"Blocked by Cloudflare (HTTP 403). Try again later.",
|
|
59
|
+
recoverable=True,
|
|
60
|
+
)
|
|
61
|
+
if status == 404:
|
|
62
|
+
raise NotFoundError(f"Page not found: {url}")
|
|
63
|
+
if status == 429:
|
|
64
|
+
retry_after = resp.headers.get("Retry-After")
|
|
65
|
+
raise RateLimitError(
|
|
66
|
+
"Rate limited by Product Hunt",
|
|
67
|
+
retry_after=float(retry_after) if retry_after else None,
|
|
68
|
+
)
|
|
69
|
+
if status >= 500:
|
|
70
|
+
raise ServerError(f"Server error (HTTP {status})", status_code=status)
|
|
71
|
+
if status != 200:
|
|
72
|
+
raise ServerError(f"Unexpected HTTP {status}: {url}", status_code=status)
|
|
73
|
+
|
|
74
|
+
return BeautifulSoup(resp.text, "html.parser")
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------------
|
|
77
|
+
# Shared card-parsing helper
|
|
78
|
+
# ------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def _parse_post_cards(soup: BeautifulSoup) -> list[Post]:
|
|
82
|
+
"""Extract Post objects from a page containing post-name-* cards."""
|
|
83
|
+
posts: list[Post] = []
|
|
84
|
+
post_names = soup.find_all(attrs={"data-test": re.compile(r"^post-name-")})
|
|
85
|
+
|
|
86
|
+
for card in post_names:
|
|
87
|
+
data_test = card.get("data-test", "")
|
|
88
|
+
post_id = data_test.replace("post-name-", "")
|
|
89
|
+
|
|
90
|
+
# Name and slug from the <a> link inside the card
|
|
91
|
+
link = card.find("a", href=True)
|
|
92
|
+
if not link:
|
|
93
|
+
continue
|
|
94
|
+
name = link.get_text(strip=True)
|
|
95
|
+
href = link["href"]
|
|
96
|
+
# href may be /posts/<slug> or /products/<slug>
|
|
97
|
+
slug = href.rsplit("/", 1)[-1] if "/" in href else href
|
|
98
|
+
|
|
99
|
+
# Tagline from the next sibling element
|
|
100
|
+
tagline_el = card.find_next_sibling()
|
|
101
|
+
tagline = tagline_el.get_text(strip=True) if tagline_el else ""
|
|
102
|
+
|
|
103
|
+
# Walk up to find the full card container (up to 8 levels)
|
|
104
|
+
container = card
|
|
105
|
+
for _ in range(8):
|
|
106
|
+
if container.parent:
|
|
107
|
+
container = container.parent
|
|
108
|
+
if container.get("data-test", "").startswith("post-item"):
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
# Votes and comments from <button> elements with numeric text
|
|
112
|
+
buttons = container.find_all("button")
|
|
113
|
+
nums = [
|
|
114
|
+
int(btn.get_text(strip=True))
|
|
115
|
+
for btn in buttons
|
|
116
|
+
if btn.get_text(strip=True).isdigit()
|
|
117
|
+
]
|
|
118
|
+
comments_count = nums[0] if len(nums) >= 1 else 0
|
|
119
|
+
votes_count = nums[1] if len(nums) >= 2 else 0
|
|
120
|
+
|
|
121
|
+
# Topics from /topics/ links
|
|
122
|
+
topic_links = [
|
|
123
|
+
a.get_text(strip=True)
|
|
124
|
+
for a in container.find_all("a", href=lambda h: h and "/topics/" in h)
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
# Thumbnail from <img> in the container
|
|
128
|
+
img = container.find("img", src=True)
|
|
129
|
+
thumbnail_url = img["src"] if img else None
|
|
130
|
+
|
|
131
|
+
posts.append(
|
|
132
|
+
Post.from_card(
|
|
133
|
+
{
|
|
134
|
+
"id": post_id,
|
|
135
|
+
"name": name,
|
|
136
|
+
"tagline": tagline,
|
|
137
|
+
"slug": slug,
|
|
138
|
+
"votes_count": votes_count,
|
|
139
|
+
"comments_count": comments_count,
|
|
140
|
+
"topics": topic_links,
|
|
141
|
+
"thumbnail_url": thumbnail_url,
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return posts
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------
|
|
149
|
+
# Posts
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def list_posts(self) -> list[Post]:
|
|
153
|
+
"""Scrape the Product Hunt homepage for today's posts."""
|
|
154
|
+
soup = self._get(BASE_URL)
|
|
155
|
+
return self._parse_post_cards(soup)
|
|
156
|
+
|
|
157
|
+
def get_post(self, slug: str) -> Post:
|
|
158
|
+
"""Scrape a single product detail page."""
|
|
159
|
+
url = f"{BASE_URL}/products/{slug}"
|
|
160
|
+
soup = self._get(url)
|
|
161
|
+
|
|
162
|
+
# Title from <title> tag (usually "Name - Product Hunt")
|
|
163
|
+
title_tag = soup.find("title")
|
|
164
|
+
title = title_tag.get_text(strip=True) if title_tag else slug
|
|
165
|
+
# Clean up " - Product Hunt" or " | Product Hunt" suffix
|
|
166
|
+
for sep in (" - Product Hunt", " | Product Hunt"):
|
|
167
|
+
if title.endswith(sep):
|
|
168
|
+
title = title[: -len(sep)]
|
|
169
|
+
|
|
170
|
+
# Description from meta tag
|
|
171
|
+
meta_desc = soup.find("meta", attrs={"name": "description"})
|
|
172
|
+
description = meta_desc["content"] if meta_desc and meta_desc.get("content") else None
|
|
173
|
+
|
|
174
|
+
# Thumbnail from og:image
|
|
175
|
+
og_image = soup.find("meta", attrs={"property": "og:image"})
|
|
176
|
+
thumbnail_url = og_image["content"] if og_image and og_image.get("content") else None
|
|
177
|
+
|
|
178
|
+
# Try to extract votes/comments from the detail page
|
|
179
|
+
votes_count = 0
|
|
180
|
+
comments_count = 0
|
|
181
|
+
buttons = soup.find_all("button")
|
|
182
|
+
nums = [
|
|
183
|
+
int(btn.get_text(strip=True)) for btn in buttons if btn.get_text(strip=True).isdigit()
|
|
184
|
+
]
|
|
185
|
+
if len(nums) >= 2:
|
|
186
|
+
comments_count = nums[0]
|
|
187
|
+
votes_count = nums[1]
|
|
188
|
+
elif len(nums) == 1:
|
|
189
|
+
votes_count = nums[0]
|
|
190
|
+
|
|
191
|
+
# Topics from /topics/ links
|
|
192
|
+
topics = [
|
|
193
|
+
a.get_text(strip=True) for a in soup.find_all("a", href=lambda h: h and "/topics/" in h)
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
return Post(
|
|
197
|
+
id=slug,
|
|
198
|
+
name=title,
|
|
199
|
+
tagline=description or "",
|
|
200
|
+
slug=slug,
|
|
201
|
+
url=f"{BASE_URL}/products/{slug}",
|
|
202
|
+
description=description,
|
|
203
|
+
votes_count=votes_count,
|
|
204
|
+
comments_count=comments_count,
|
|
205
|
+
topics=topics,
|
|
206
|
+
thumbnail_url=thumbnail_url,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# ------------------------------------------------------------------
|
|
210
|
+
# Leaderboard
|
|
211
|
+
# ------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
def list_leaderboard(
|
|
214
|
+
self,
|
|
215
|
+
period: str = "daily",
|
|
216
|
+
year: int | None = None,
|
|
217
|
+
month: int | None = None,
|
|
218
|
+
day: int | None = None,
|
|
219
|
+
) -> list[Post]:
|
|
220
|
+
"""Scrape the Product Hunt leaderboard.
|
|
221
|
+
|
|
222
|
+
*period* must be one of ``daily``, ``weekly``, ``monthly``.
|
|
223
|
+
Date components are optional; when omitted today's date is used
|
|
224
|
+
for ``daily``, or the plain ``/leaderboard`` page for others.
|
|
225
|
+
|
|
226
|
+
The only supported URL pattern is ``/leaderboard/daily/YYYY/M/D``.
|
|
227
|
+
Product Hunt does not expose weekly or monthly leaderboard pages
|
|
228
|
+
as scrapable lists, so *period* is accepted for API compatibility
|
|
229
|
+
but always resolves to the daily leaderboard.
|
|
230
|
+
"""
|
|
231
|
+
if year is not None and month is not None and day is not None:
|
|
232
|
+
url = f"{BASE_URL}/leaderboard/daily/{year}/{month}/{day}"
|
|
233
|
+
else:
|
|
234
|
+
# Default to today
|
|
235
|
+
from datetime import date as _date
|
|
236
|
+
|
|
237
|
+
today = _date.today()
|
|
238
|
+
url = f"{BASE_URL}/leaderboard/daily/{today.year}/{today.month}/{today.day}"
|
|
239
|
+
|
|
240
|
+
soup = self._get(url)
|
|
241
|
+
return self._parse_post_cards(soup)
|
|
242
|
+
|
|
243
|
+
# ------------------------------------------------------------------
|
|
244
|
+
# Users
|
|
245
|
+
# ------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
def get_user(self, username: str) -> User:
|
|
248
|
+
"""Scrape a user's public profile page."""
|
|
249
|
+
url = f"{BASE_URL}/@{username}"
|
|
250
|
+
soup = self._get(url)
|
|
251
|
+
|
|
252
|
+
# Name — try og:title first (usually cleaner), then <title>
|
|
253
|
+
og_title = soup.find("meta", attrs={"property": "og:title"})
|
|
254
|
+
if og_title and og_title.get("content"):
|
|
255
|
+
name = og_title["content"]
|
|
256
|
+
else:
|
|
257
|
+
title_tag = soup.find("title")
|
|
258
|
+
name = title_tag.get_text(strip=True) if title_tag else ""
|
|
259
|
+
|
|
260
|
+
# Clean suffixes like " - Product Hunt", "'s profile on Product Hunt"
|
|
261
|
+
for suffix in (
|
|
262
|
+
" - Product Hunt",
|
|
263
|
+
" | Product Hunt",
|
|
264
|
+
"'s profile on Product Hunt",
|
|
265
|
+
):
|
|
266
|
+
if name.endswith(suffix):
|
|
267
|
+
name = name[: -len(suffix)]
|
|
268
|
+
# Strip "(@username)" if present
|
|
269
|
+
paren = f"(@{username})"
|
|
270
|
+
if paren in name:
|
|
271
|
+
name = name.replace(paren, "").strip()
|
|
272
|
+
# Strip leading/trailing quotes or whitespace
|
|
273
|
+
name = name.strip("\" '")
|
|
274
|
+
|
|
275
|
+
# Headline from meta description
|
|
276
|
+
meta_desc = soup.find("meta", attrs={"name": "description"})
|
|
277
|
+
headline = meta_desc["content"] if meta_desc and meta_desc.get("content") else None
|
|
278
|
+
|
|
279
|
+
# Profile image from og:image
|
|
280
|
+
og_image = soup.find("meta", attrs={"property": "og:image"})
|
|
281
|
+
profile_image = og_image["content"] if og_image and og_image.get("content") else None
|
|
282
|
+
|
|
283
|
+
# Followers — look for text matching "N Followers" or "N followers"
|
|
284
|
+
followers_count = 0
|
|
285
|
+
followers_pattern = re.compile(r"([\d,]+)\s+[Ff]ollowers?")
|
|
286
|
+
for text_el in soup.find_all(string=followers_pattern):
|
|
287
|
+
m = followers_pattern.search(text_el)
|
|
288
|
+
if m:
|
|
289
|
+
followers_count = int(m.group(1).replace(",", ""))
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
return User.from_card(
|
|
293
|
+
{
|
|
294
|
+
"id": username,
|
|
295
|
+
"name": name or username,
|
|
296
|
+
"username": username,
|
|
297
|
+
"headline": headline,
|
|
298
|
+
"profile_image": profile_image,
|
|
299
|
+
"followers_count": followers_count,
|
|
300
|
+
}
|
|
301
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Domain-specific exception hierarchy for cli-web-producthunt."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AppError(Exception):
|
|
5
|
+
"""Base for all producthunt CLI errors."""
|
|
6
|
+
|
|
7
|
+
def to_dict(self):
|
|
8
|
+
return {"error": True, "code": "UNKNOWN", "message": str(self)}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AuthError(AppError):
|
|
12
|
+
def __init__(self, message: str, recoverable: bool = True):
|
|
13
|
+
self.recoverable = recoverable
|
|
14
|
+
super().__init__(message)
|
|
15
|
+
|
|
16
|
+
def to_dict(self):
|
|
17
|
+
return {"error": True, "code": "AUTH_EXPIRED", "message": str(self)}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RateLimitError(AppError):
|
|
21
|
+
def __init__(self, message: str, retry_after: float | None = None):
|
|
22
|
+
self.retry_after = retry_after
|
|
23
|
+
super().__init__(message)
|
|
24
|
+
|
|
25
|
+
def to_dict(self):
|
|
26
|
+
return {
|
|
27
|
+
"error": True,
|
|
28
|
+
"code": "RATE_LIMITED",
|
|
29
|
+
"message": str(self),
|
|
30
|
+
"retry_after": self.retry_after,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NetworkError(AppError):
|
|
35
|
+
def to_dict(self):
|
|
36
|
+
return {"error": True, "code": "NETWORK_ERROR", "message": str(self)}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ServerError(AppError):
|
|
40
|
+
def __init__(self, message: str, status_code: int = 500):
|
|
41
|
+
self.status_code = status_code
|
|
42
|
+
super().__init__(message)
|
|
43
|
+
|
|
44
|
+
def to_dict(self):
|
|
45
|
+
return {"error": True, "code": "SERVER_ERROR", "message": str(self)}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class NotFoundError(AppError):
|
|
49
|
+
def to_dict(self):
|
|
50
|
+
return {"error": True, "code": "NOT_FOUND", "message": str(self)}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Data models for Product Hunt scraped responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Post:
|
|
11
|
+
id: str
|
|
12
|
+
name: str
|
|
13
|
+
tagline: str
|
|
14
|
+
slug: str
|
|
15
|
+
url: str
|
|
16
|
+
description: str | None = None
|
|
17
|
+
votes_count: int = 0
|
|
18
|
+
comments_count: int = 0
|
|
19
|
+
topics: list[str] = field(default_factory=list)
|
|
20
|
+
thumbnail_url: str | None = None
|
|
21
|
+
rank: int | None = None
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> dict:
|
|
24
|
+
return {
|
|
25
|
+
"id": self.id,
|
|
26
|
+
"name": self.name,
|
|
27
|
+
"tagline": self.tagline,
|
|
28
|
+
"slug": self.slug,
|
|
29
|
+
"url": self.url,
|
|
30
|
+
"description": self.description,
|
|
31
|
+
"votes_count": self.votes_count,
|
|
32
|
+
"comments_count": self.comments_count,
|
|
33
|
+
"topics": self.topics,
|
|
34
|
+
"thumbnail_url": self.thumbnail_url,
|
|
35
|
+
"rank": self.rank,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_card(cls, card_data: dict) -> Post:
|
|
40
|
+
"""Build a Post from scraped card data.
|
|
41
|
+
|
|
42
|
+
``card_data`` keys: id, name, tagline, slug, votes_count,
|
|
43
|
+
comments_count, topics, thumbnail_url.
|
|
44
|
+
"""
|
|
45
|
+
name = card_data.get("name", "")
|
|
46
|
+
# Extract rank from name prefix like "1. Stitch..."
|
|
47
|
+
rank = None
|
|
48
|
+
rank_match = re.match(r"^(\d+)\.\s+", name)
|
|
49
|
+
if rank_match:
|
|
50
|
+
rank = int(rank_match.group(1))
|
|
51
|
+
name = name[rank_match.end() :]
|
|
52
|
+
|
|
53
|
+
slug = card_data.get("slug", "")
|
|
54
|
+
return cls(
|
|
55
|
+
id=card_data.get("id", ""),
|
|
56
|
+
name=name,
|
|
57
|
+
tagline=card_data.get("tagline", ""),
|
|
58
|
+
slug=slug,
|
|
59
|
+
url=f"https://www.producthunt.com/products/{slug}" if slug else "",
|
|
60
|
+
description=card_data.get("description"),
|
|
61
|
+
votes_count=card_data.get("votes_count", 0),
|
|
62
|
+
comments_count=card_data.get("comments_count", 0),
|
|
63
|
+
topics=card_data.get("topics", []),
|
|
64
|
+
thumbnail_url=card_data.get("thumbnail_url"),
|
|
65
|
+
rank=rank,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class User:
|
|
71
|
+
id: str
|
|
72
|
+
name: str
|
|
73
|
+
username: str
|
|
74
|
+
headline: str | None = None
|
|
75
|
+
profile_image: str | None = None
|
|
76
|
+
website_url: str | None = None
|
|
77
|
+
followers_count: int = 0
|
|
78
|
+
|
|
79
|
+
def to_dict(self) -> dict:
|
|
80
|
+
return {
|
|
81
|
+
"id": self.id,
|
|
82
|
+
"name": self.name,
|
|
83
|
+
"username": self.username,
|
|
84
|
+
"headline": self.headline,
|
|
85
|
+
"profile_image": self.profile_image,
|
|
86
|
+
"website_url": self.website_url,
|
|
87
|
+
"followers_count": self.followers_count,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def from_card(cls, card_data: dict) -> User:
|
|
92
|
+
"""Build a User from scraped profile data."""
|
|
93
|
+
return cls(
|
|
94
|
+
id=card_data.get("id", ""),
|
|
95
|
+
name=card_data.get("name", ""),
|
|
96
|
+
username=card_data.get("username", ""),
|
|
97
|
+
headline=card_data.get("headline"),
|
|
98
|
+
profile_image=card_data.get("profile_image"),
|
|
99
|
+
website_url=card_data.get("website_url"),
|
|
100
|
+
followers_count=card_data.get("followers_count", 0),
|
|
101
|
+
)
|