getred 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getred/fetcher.py +39 -6
- getred/utils.py +1 -1
- {getred-0.1.4.dist-info → getred-0.1.5.dist-info}/METADATA +1 -1
- getred-0.1.5.dist-info/RECORD +12 -0
- getred-0.1.4.dist-info/RECORD +0 -12
- {getred-0.1.4.dist-info → getred-0.1.5.dist-info}/WHEEL +0 -0
- {getred-0.1.4.dist-info → getred-0.1.5.dist-info}/entry_points.txt +0 -0
- {getred-0.1.4.dist-info → getred-0.1.5.dist-info}/licenses/LICENSE +0 -0
getred/fetcher.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""HTTP client for fetching Reddit data."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
from urllib.parse import urlsplit, urlunsplit
|
|
7
|
+
|
|
3
8
|
import httpx
|
|
4
|
-
from typing import Dict, Any
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class RedditFetcher:
|
|
@@ -10,11 +14,29 @@ class RedditFetcher:
|
|
|
10
14
|
USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
|
|
11
15
|
TIMEOUT = 30.0
|
|
12
16
|
|
|
13
|
-
def __init__(self):
|
|
17
|
+
def __init__(self, transport: Optional[httpx.BaseTransport] = None):
|
|
14
18
|
"""Initialize the fetcher with custom headers."""
|
|
15
19
|
self.headers = {
|
|
16
20
|
"User-Agent": self.USER_AGENT
|
|
17
21
|
}
|
|
22
|
+
self._transport = transport
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def _build_json_url(url: str) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Construct a Reddit .json endpoint URL from a thread URL.
|
|
28
|
+
|
|
29
|
+
- Preserves query parameters
|
|
30
|
+
- Avoids double-appending .json
|
|
31
|
+
- Drops fragments
|
|
32
|
+
"""
|
|
33
|
+
parts = urlsplit(url)
|
|
34
|
+
|
|
35
|
+
path = parts.path or "/"
|
|
36
|
+
if not path.endswith(".json"):
|
|
37
|
+
path = path + ".json"
|
|
38
|
+
|
|
39
|
+
return urlunsplit((parts.scheme, parts.netloc, path, parts.query, ""))
|
|
18
40
|
|
|
19
41
|
def fetch_thread(self, url: str) -> Dict[str, Any]:
|
|
20
42
|
"""
|
|
@@ -29,10 +51,21 @@ class RedditFetcher:
|
|
|
29
51
|
Raises:
|
|
30
52
|
httpx.HTTPError: If request fails
|
|
31
53
|
"""
|
|
32
|
-
|
|
33
|
-
json_url = url.rstrip('/') + '.json'
|
|
54
|
+
json_url = self._build_json_url(url)
|
|
34
55
|
|
|
35
|
-
with httpx.Client(
|
|
56
|
+
with httpx.Client(
|
|
57
|
+
headers=self.headers,
|
|
58
|
+
timeout=self.TIMEOUT,
|
|
59
|
+
follow_redirects=True,
|
|
60
|
+
transport=self._transport,
|
|
61
|
+
) as client:
|
|
36
62
|
response = client.get(json_url)
|
|
37
63
|
response.raise_for_status()
|
|
38
|
-
|
|
64
|
+
try:
|
|
65
|
+
return response.json()
|
|
66
|
+
except ValueError as e:
|
|
67
|
+
content_type = response.headers.get("Content-Type", "<missing>")
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"Non-JSON response from Reddit endpoint "
|
|
70
|
+
f"(url={response.url!s}, status={response.status_code}, content_type={content_type})"
|
|
71
|
+
) from e
|
getred/utils.py
CHANGED
|
@@ -16,7 +16,7 @@ def validate_reddit_url(url: str) -> bool:
|
|
|
16
16
|
Returns:
|
|
17
17
|
True if valid Reddit thread URL, False otherwise
|
|
18
18
|
"""
|
|
19
|
-
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]
|
|
19
|
+
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+(?:[/?#]|$)'
|
|
20
20
|
return bool(re.match(pattern, url))
|
|
21
21
|
|
|
22
22
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: getred
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
|
|
5
5
|
Project-URL: Homepage, https://github.com/mgelei/getred
|
|
6
6
|
Project-URL: Issues, https://github.com/mgelei/getred/issues
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
getred/__init__.py,sha256=OV8_4Tk9tyvGAfZ4flBb_clJWWhQzPyMNjbsmdo0YJc,198
|
|
2
|
+
getred/__main__.py,sha256=feAHoe3sKnTtTZZQ8CQntbtVBbkDL3EqaldR6LpLU48,108
|
|
3
|
+
getred/cli.py,sha256=ZcXE9yirkiwwmfohxSpsuaoC8lt_ubY4BgoPWYZQ_bI,2149
|
|
4
|
+
getred/fetcher.py,sha256=Md4g39rTAOaNL9EOCWfEaQJxqZEOkaQDpGtnvcFKobU,2120
|
|
5
|
+
getred/models.py,sha256=DJGHsXQJnKdgUSv_mXzsQd9luzsamw5UgkZT2WAZHBg,1613
|
|
6
|
+
getred/parser.py,sha256=hx_SHTZEcCmkfS1F2E8vlj5Z-v_xYRhJtcxiEFABJK4,3066
|
|
7
|
+
getred/utils.py,sha256=gcohLLJQNuu9cFpzcIpCCmUrjCWDFZL7E_EkZnP7Fag,2165
|
|
8
|
+
getred-0.1.5.dist-info/METADATA,sha256=OecYZHVM-lCagnjpHAKUlOqDt8LhvbiJTUjBcr5fpwk,729
|
|
9
|
+
getred-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
getred-0.1.5.dist-info/entry_points.txt,sha256=sUbiNDbmjeRZLW1zij_nhtxM9761F6DMmGeRl60xenY,43
|
|
11
|
+
getred-0.1.5.dist-info/licenses/LICENSE,sha256=GJ-Sk2Q9pSMeuVlqqZQe5P5DLvOjKQRVpTA1fy_JftI,1073
|
|
12
|
+
getred-0.1.5.dist-info/RECORD,,
|
getred-0.1.4.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
getred/__init__.py,sha256=OV8_4Tk9tyvGAfZ4flBb_clJWWhQzPyMNjbsmdo0YJc,198
|
|
2
|
-
getred/__main__.py,sha256=feAHoe3sKnTtTZZQ8CQntbtVBbkDL3EqaldR6LpLU48,108
|
|
3
|
-
getred/cli.py,sha256=ZcXE9yirkiwwmfohxSpsuaoC8lt_ubY4BgoPWYZQ_bI,2149
|
|
4
|
-
getred/fetcher.py,sha256=kdFb8lWAdQzEFGzGaSKI2W-YQyZyD5tclSDXC2j5o_o,1033
|
|
5
|
-
getred/models.py,sha256=DJGHsXQJnKdgUSv_mXzsQd9luzsamw5UgkZT2WAZHBg,1613
|
|
6
|
-
getred/parser.py,sha256=hx_SHTZEcCmkfS1F2E8vlj5Z-v_xYRhJtcxiEFABJK4,3066
|
|
7
|
-
getred/utils.py,sha256=z4mKfCbME6ffi9PC7CnMcYZNFEMtVDTF5JJKMoBgirg,2155
|
|
8
|
-
getred-0.1.4.dist-info/METADATA,sha256=HOdnBn4tKRk7buZokZeM99YvNvqD1CK4LVl_O5SXSFU,729
|
|
9
|
-
getred-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
-
getred-0.1.4.dist-info/entry_points.txt,sha256=sUbiNDbmjeRZLW1zij_nhtxM9761F6DMmGeRl60xenY,43
|
|
11
|
-
getred-0.1.4.dist-info/licenses/LICENSE,sha256=GJ-Sk2Q9pSMeuVlqqZQe5P5DLvOjKQRVpTA1fy_JftI,1073
|
|
12
|
-
getred-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|