getred 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: getred
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
5
5
  Project-URL: Homepage, https://github.com/mgelei/getred
6
6
  Project-URL: Issues, https://github.com/mgelei/getred/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "getred"
7
- version = "0.1.4"
7
+ version = "0.1.5"
8
8
  description = "A CLI tool to fetch Reddit threads and save them as structured JSON"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -0,0 +1,71 @@
1
+ """HTTP client for fetching Reddit data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+ from urllib.parse import urlsplit, urlunsplit
7
+
8
+ import httpx
9
+
10
+
11
+ class RedditFetcher:
12
+ """Fetches Reddit thread data using the public JSON API."""
13
+
14
+ USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
15
+ TIMEOUT = 30.0
16
+
17
+ def __init__(self, transport: Optional[httpx.BaseTransport] = None):
18
+ """Initialize the fetcher with custom headers."""
19
+ self.headers = {
20
+ "User-Agent": self.USER_AGENT
21
+ }
22
+ self._transport = transport
23
+
24
+ @staticmethod
25
+ def _build_json_url(url: str) -> str:
26
+ """
27
+ Construct a Reddit .json endpoint URL from a thread URL.
28
+
29
+ - Preserves query parameters
30
+ - Avoids double-appending .json
31
+ - Drops fragments
32
+ """
33
+ parts = urlsplit(url)
34
+
35
+ path = parts.path or "/"
36
+ if not path.endswith(".json"):
37
+ path = path + ".json"
38
+
39
+ return urlunsplit((parts.scheme, parts.netloc, path, parts.query, ""))
40
+
41
+ def fetch_thread(self, url: str) -> Dict[str, Any]:
42
+ """
43
+ Fetch a Reddit thread as JSON.
44
+
45
+ Args:
46
+ url: Reddit thread URL (will be converted to JSON endpoint)
47
+
48
+ Returns:
49
+ Dict containing Reddit API response
50
+
51
+ Raises:
52
+ httpx.HTTPError: If request fails
53
+ """
54
+ json_url = self._build_json_url(url)
55
+
56
+ with httpx.Client(
57
+ headers=self.headers,
58
+ timeout=self.TIMEOUT,
59
+ follow_redirects=True,
60
+ transport=self._transport,
61
+ ) as client:
62
+ response = client.get(json_url)
63
+ response.raise_for_status()
64
+ try:
65
+ return response.json()
66
+ except ValueError as e:
67
+ content_type = response.headers.get("Content-Type", "<missing>")
68
+ raise ValueError(
69
+ f"Non-JSON response from Reddit endpoint "
70
+ f"(url={response.url!s}, status={response.status_code}, content_type={content_type})"
71
+ ) from e
@@ -16,7 +16,7 @@ def validate_reddit_url(url: str) -> bool:
16
16
  Returns:
17
17
  True if valid Reddit thread URL, False otherwise
18
18
  """
19
- pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+/'
19
+ pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+(?:[/?#]|$)'
20
20
  return bool(re.match(pattern, url))
21
21
 
22
22
 
@@ -0,0 +1,115 @@
1
+ """Tests for RedditFetcher URL handling and redirect safety."""
2
+
3
+ import httpx
4
+ import pytest
5
+
6
+ from getred.fetcher import RedditFetcher
7
+
8
+
9
+ def test_fetch_thread_preserves_query_params():
10
+ seen_urls: list[str] = []
11
+
12
+ def handler(request: httpx.Request) -> httpx.Response:
13
+ seen_urls.append(str(request.url))
14
+ return httpx.Response(200, json=[{"ok": True}], request=request)
15
+
16
+ fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
17
+
18
+ url = "https://www.reddit.com/r/python/comments/abc123/cool_title/?sort=top"
19
+ data = fetcher.fetch_thread(url)
20
+
21
+ assert data == [{"ok": True}]
22
+ assert seen_urls == [
23
+ "https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
24
+ ]
25
+
26
+
27
+ def test_fetch_thread_does_not_double_append_json():
28
+ seen_urls: list[str] = []
29
+
30
+ def handler(request: httpx.Request) -> httpx.Response:
31
+ seen_urls.append(str(request.url))
32
+ return httpx.Response(200, json={"ok": True}, request=request)
33
+
34
+ fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
35
+
36
+ url = "https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
37
+ data = fetcher.fetch_thread(url)
38
+
39
+ assert data == {"ok": True}
40
+ assert seen_urls == [url]
41
+
42
+
43
+ @pytest.mark.parametrize(
44
+ ("input_url", "expected_json_url"),
45
+ [
46
+ (
47
+ "https://www.reddit.com/r/python/comments/abc123/cool_title/",
48
+ "https://www.reddit.com/r/python/comments/abc123/cool_title/.json",
49
+ ),
50
+ (
51
+ "https://www.reddit.com/r/python/comments/abc123/cool_title",
52
+ "https://www.reddit.com/r/python/comments/abc123/cool_title.json",
53
+ ),
54
+ ],
55
+ )
56
+ def test_fetch_thread_trailing_slash_variants(input_url: str, expected_json_url: str):
57
+ seen_urls: list[str] = []
58
+
59
+ def handler(request: httpx.Request) -> httpx.Response:
60
+ seen_urls.append(str(request.url))
61
+ return httpx.Response(200, json={"ok": True}, request=request)
62
+
63
+ fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
64
+ assert fetcher.fetch_thread(input_url) == {"ok": True}
65
+ assert seen_urls == [expected_json_url]
66
+
67
+
68
+ def test_fetch_thread_follows_redirects():
69
+ seen_urls: list[str] = []
70
+
71
+ redirected_to = (
72
+ "https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
73
+ )
74
+
75
+ def handler(request: httpx.Request) -> httpx.Response:
76
+ seen_urls.append(str(request.url))
77
+ if len(seen_urls) == 1:
78
+ return httpx.Response(
79
+ 302,
80
+ headers={"Location": redirected_to},
81
+ request=request,
82
+ )
83
+ return httpx.Response(200, json={"ok": True}, request=request)
84
+
85
+ fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
86
+
87
+ url = "https://reddit.com/r/python/comments/abc123/cool_title/?sort=top"
88
+ data = fetcher.fetch_thread(url)
89
+
90
+ assert data == {"ok": True}
91
+ assert seen_urls == [
92
+ "https://reddit.com/r/python/comments/abc123/cool_title/.json?sort=top",
93
+ redirected_to,
94
+ ]
95
+
96
+
97
+ def test_fetch_thread_non_json_body_raises_clear_error():
98
+ def handler(request: httpx.Request) -> httpx.Response:
99
+ return httpx.Response(
100
+ 200,
101
+ headers={"Content-Type": "text/html"},
102
+ content=b"<html>not json</html>",
103
+ request=request,
104
+ )
105
+
106
+ fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
107
+
108
+ url = "https://www.reddit.com/r/python/comments/abc123/cool_title/"
109
+ with pytest.raises(ValueError) as excinfo:
110
+ fetcher.fetch_thread(url)
111
+
112
+ message = str(excinfo.value)
113
+ assert "Non-JSON response" in message
114
+ assert "content_type=text/html" in message
115
+
@@ -14,6 +14,7 @@ class TestValidateRedditUrl:
14
14
  "https://www.reddit.com/r/python/comments/abc123/cool_title/",
15
15
  "http://reddit.com/r/AskReddit/comments/xyz789/interesting_question/",
16
16
  "https://reddit.com/r/programming/comments/test123/test/extra/path/",
17
+ "https://reddit.com/r/python/comments/abc123/cool_title",
17
18
  ]
18
19
  for url in valid_urls:
19
20
  assert validate_reddit_url(url), f"Expected {url} to be valid"
@@ -1,38 +0,0 @@
1
- """HTTP client for fetching Reddit data."""
2
-
3
- import httpx
4
- from typing import Dict, Any
5
-
6
-
7
- class RedditFetcher:
8
- """Fetches Reddit thread data using the public JSON API."""
9
-
10
- USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
11
- TIMEOUT = 30.0
12
-
13
- def __init__(self):
14
- """Initialize the fetcher with custom headers."""
15
- self.headers = {
16
- "User-Agent": self.USER_AGENT
17
- }
18
-
19
- def fetch_thread(self, url: str) -> Dict[str, Any]:
20
- """
21
- Fetch a Reddit thread as JSON.
22
-
23
- Args:
24
- url: Reddit thread URL (will be converted to JSON endpoint)
25
-
26
- Returns:
27
- Dict containing Reddit API response
28
-
29
- Raises:
30
- httpx.HTTPError: If request fails
31
- """
32
- # Ensure URL ends with .json
33
- json_url = url.rstrip('/') + '.json'
34
-
35
- with httpx.Client(headers=self.headers, timeout=self.TIMEOUT) as client:
36
- response = client.get(json_url)
37
- response.raise_for_status()
38
- return response.json()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes