getred 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getred-0.1.6/AGENTS.md +31 -0
- {getred-0.1.4 → getred-0.1.6}/PKG-INFO +1 -1
- {getred-0.1.4 → getred-0.1.6}/pyproject.toml +1 -1
- getred-0.1.6/src/getred/fetcher.py +71 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/utils.py +1 -1
- getred-0.1.6/tests/test_fetcher.py +115 -0
- {getred-0.1.4 → getred-0.1.6}/tests/test_utils.py +1 -0
- getred-0.1.4/src/getred/fetcher.py +0 -38
- {getred-0.1.4 → getred-0.1.6}/.claude/agents/pytest-runner.md +0 -0
- {getred-0.1.4 → getred-0.1.6}/.github/workflows/publish.yml +0 -0
- {getred-0.1.4 → getred-0.1.6}/.github/workflows/test-pr.yml +0 -0
- {getred-0.1.4 → getred-0.1.6}/.github/workflows/version-check.yml +0 -0
- {getred-0.1.4 → getred-0.1.6}/.gitignore +0 -0
- {getred-0.1.4 → getred-0.1.6}/CLAUDE.md +0 -0
- {getred-0.1.4 → getred-0.1.6}/LICENSE +0 -0
- {getred-0.1.4 → getred-0.1.6}/README.md +0 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/__init__.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/__main__.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/cli.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/models.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/src/getred/parser.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/tests/__init__.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/tests/conftest.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/tests/test_models.py +0 -0
- {getred-0.1.4 → getred-0.1.6}/tests/test_parser.py +0 -0
getred-0.1.6/AGENTS.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Repository Guidelines
|
|
2
|
+
|
|
3
|
+
## Project Structure & Module Organization
|
|
4
|
+
- `src/getred/`: Python package (CLI + fetch/parse/save pipeline)
|
|
5
|
+
- `cli.py` (Click entrypoint), `fetcher.py` (httpx client), `parser.py`, `models.py`, `utils.py`
|
|
6
|
+
- `tests/`: pytest suite and fixtures (`tests/conftest.py`)
|
|
7
|
+
- `pyproject.toml`: packaging (hatchling), dependencies, and pytest config
|
|
8
|
+
- `.github/workflows/`: CI (PR tests, version bump check, publish)
|
|
9
|
+
|
|
10
|
+
## Build, Test, and Development Commands
|
|
11
|
+
- Create a venv: `python -m venv .venv && source .venv/bin/activate`
|
|
12
|
+
- Install (editable + tests): `python -m pip install -U pip && python -m pip install -e ".[test]"`
|
|
13
|
+
- Run locally: `getred "<thread_url>"` or `python -m getred "<thread_url>"`
|
|
14
|
+
- Run tests: `pytest`
|
|
15
|
+
- Build artifacts (sdist/wheel): `python -m pip install build && python -m build`
|
|
16
|
+
|
|
17
|
+
## Coding Style & Naming Conventions
|
|
18
|
+
- Python 3.12+ (CI uses 3.12); keep code compatible with the declared minimum.
|
|
19
|
+
- 4-space indentation, PEP 8, and small focused functions.
|
|
20
|
+
- Naming: `snake_case` (functions/vars), `PascalCase` (classes), `UPPER_SNAKE_CASE` (constants).
|
|
21
|
+
- Prefer `pathlib.Path` for file paths and keep dependencies minimal (edit `pyproject.toml`).
|
|
22
|
+
|
|
23
|
+
## Testing Guidelines
|
|
24
|
+
- Use pytest; keep tests in `tests/` and name files `test_*.py`.
|
|
25
|
+
- Prefer deterministic tests: avoid live Reddit calls (mock httpx or use fixtures).
|
|
26
|
+
- Add/adjust tests alongside behavior changes (parser edge-cases, URL validation, output shape).
|
|
27
|
+
|
|
28
|
+
## Commit & Pull Request Guidelines
|
|
29
|
+
- Commit messages follow a Conventional Commits-style prefix used in this repo: `feat: ...`, `fix: ...`, `docs: ...`, `chore: ...`, `cicd: ...`.
|
|
30
|
+
- PRs should include: what changed, why, how to test (`pytest`), and any user-facing CLI/output changes.
|
|
31
|
+
- Versioning: PRs to `master` must bump `version = "..."` in `pyproject.toml` (CI enforces this); keep other version strings (e.g. `src/getred/__init__.py#__version__`) consistent when releasing.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: getred
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
|
|
5
5
|
Project-URL: Homepage, https://github.com/mgelei/getred
|
|
6
6
|
Project-URL: Issues, https://github.com/mgelei/getred/issues
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""HTTP client for fetching Reddit data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
from urllib.parse import urlsplit, urlunsplit
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RedditFetcher:
|
|
12
|
+
"""Fetches Reddit thread data using the public JSON API."""
|
|
13
|
+
|
|
14
|
+
USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
|
|
15
|
+
TIMEOUT = 30.0
|
|
16
|
+
|
|
17
|
+
def __init__(self, transport: Optional[httpx.BaseTransport] = None):
|
|
18
|
+
"""Initialize the fetcher with custom headers."""
|
|
19
|
+
self.headers = {
|
|
20
|
+
"User-Agent": self.USER_AGENT
|
|
21
|
+
}
|
|
22
|
+
self._transport = transport
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def _build_json_url(url: str) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Construct a Reddit .json endpoint URL from a thread URL.
|
|
28
|
+
|
|
29
|
+
- Preserves query parameters
|
|
30
|
+
- Avoids double-appending .json
|
|
31
|
+
- Drops fragments
|
|
32
|
+
"""
|
|
33
|
+
parts = urlsplit(url)
|
|
34
|
+
|
|
35
|
+
path = parts.path or "/"
|
|
36
|
+
if not path.endswith(".json"):
|
|
37
|
+
path = path + ".json"
|
|
38
|
+
|
|
39
|
+
return urlunsplit((parts.scheme, parts.netloc, path, parts.query, ""))
|
|
40
|
+
|
|
41
|
+
def fetch_thread(self, url: str) -> Dict[str, Any]:
|
|
42
|
+
"""
|
|
43
|
+
Fetch a Reddit thread as JSON.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
url: Reddit thread URL (will be converted to JSON endpoint)
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Dict containing Reddit API response
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
httpx.HTTPError: If request fails
|
|
53
|
+
"""
|
|
54
|
+
json_url = self._build_json_url(url)
|
|
55
|
+
|
|
56
|
+
with httpx.Client(
|
|
57
|
+
headers=self.headers,
|
|
58
|
+
timeout=self.TIMEOUT,
|
|
59
|
+
follow_redirects=True,
|
|
60
|
+
transport=self._transport,
|
|
61
|
+
) as client:
|
|
62
|
+
response = client.get(json_url)
|
|
63
|
+
response.raise_for_status()
|
|
64
|
+
try:
|
|
65
|
+
return response.json()
|
|
66
|
+
except ValueError as e:
|
|
67
|
+
content_type = response.headers.get("Content-Type", "<missing>")
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"Non-JSON response from Reddit endpoint "
|
|
70
|
+
f"(url={response.url!s}, status={response.status_code}, content_type={content_type})"
|
|
71
|
+
) from e
|
|
@@ -16,7 +16,7 @@ def validate_reddit_url(url: str) -> bool:
|
|
|
16
16
|
Returns:
|
|
17
17
|
True if valid Reddit thread URL, False otherwise
|
|
18
18
|
"""
|
|
19
|
-
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]
|
|
19
|
+
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+(?:[/?#]|$)'
|
|
20
20
|
return bool(re.match(pattern, url))
|
|
21
21
|
|
|
22
22
|
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Tests for RedditFetcher URL handling and redirect safety."""
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from getred.fetcher import RedditFetcher
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_fetch_thread_preserves_query_params():
|
|
10
|
+
seen_urls: list[str] = []
|
|
11
|
+
|
|
12
|
+
def handler(request: httpx.Request) -> httpx.Response:
|
|
13
|
+
seen_urls.append(str(request.url))
|
|
14
|
+
return httpx.Response(200, json=[{"ok": True}], request=request)
|
|
15
|
+
|
|
16
|
+
fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
|
|
17
|
+
|
|
18
|
+
url = "https://www.reddit.com/r/python/comments/abc123/cool_title/?sort=top"
|
|
19
|
+
data = fetcher.fetch_thread(url)
|
|
20
|
+
|
|
21
|
+
assert data == [{"ok": True}]
|
|
22
|
+
assert seen_urls == [
|
|
23
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_fetch_thread_does_not_double_append_json():
|
|
28
|
+
seen_urls: list[str] = []
|
|
29
|
+
|
|
30
|
+
def handler(request: httpx.Request) -> httpx.Response:
|
|
31
|
+
seen_urls.append(str(request.url))
|
|
32
|
+
return httpx.Response(200, json={"ok": True}, request=request)
|
|
33
|
+
|
|
34
|
+
fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
|
|
35
|
+
|
|
36
|
+
url = "https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
|
|
37
|
+
data = fetcher.fetch_thread(url)
|
|
38
|
+
|
|
39
|
+
assert data == {"ok": True}
|
|
40
|
+
assert seen_urls == [url]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.parametrize(
|
|
44
|
+
("input_url", "expected_json_url"),
|
|
45
|
+
[
|
|
46
|
+
(
|
|
47
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title/",
|
|
48
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title/.json",
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title",
|
|
52
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title.json",
|
|
53
|
+
),
|
|
54
|
+
],
|
|
55
|
+
)
|
|
56
|
+
def test_fetch_thread_trailing_slash_variants(input_url: str, expected_json_url: str):
|
|
57
|
+
seen_urls: list[str] = []
|
|
58
|
+
|
|
59
|
+
def handler(request: httpx.Request) -> httpx.Response:
|
|
60
|
+
seen_urls.append(str(request.url))
|
|
61
|
+
return httpx.Response(200, json={"ok": True}, request=request)
|
|
62
|
+
|
|
63
|
+
fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
|
|
64
|
+
assert fetcher.fetch_thread(input_url) == {"ok": True}
|
|
65
|
+
assert seen_urls == [expected_json_url]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_fetch_thread_follows_redirects():
|
|
69
|
+
seen_urls: list[str] = []
|
|
70
|
+
|
|
71
|
+
redirected_to = (
|
|
72
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title/.json?sort=top"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def handler(request: httpx.Request) -> httpx.Response:
|
|
76
|
+
seen_urls.append(str(request.url))
|
|
77
|
+
if len(seen_urls) == 1:
|
|
78
|
+
return httpx.Response(
|
|
79
|
+
302,
|
|
80
|
+
headers={"Location": redirected_to},
|
|
81
|
+
request=request,
|
|
82
|
+
)
|
|
83
|
+
return httpx.Response(200, json={"ok": True}, request=request)
|
|
84
|
+
|
|
85
|
+
fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
|
|
86
|
+
|
|
87
|
+
url = "https://reddit.com/r/python/comments/abc123/cool_title/?sort=top"
|
|
88
|
+
data = fetcher.fetch_thread(url)
|
|
89
|
+
|
|
90
|
+
assert data == {"ok": True}
|
|
91
|
+
assert seen_urls == [
|
|
92
|
+
"https://reddit.com/r/python/comments/abc123/cool_title/.json?sort=top",
|
|
93
|
+
redirected_to,
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_fetch_thread_non_json_body_raises_clear_error():
|
|
98
|
+
def handler(request: httpx.Request) -> httpx.Response:
|
|
99
|
+
return httpx.Response(
|
|
100
|
+
200,
|
|
101
|
+
headers={"Content-Type": "text/html"},
|
|
102
|
+
content=b"<html>not json</html>",
|
|
103
|
+
request=request,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
fetcher = RedditFetcher(transport=httpx.MockTransport(handler))
|
|
107
|
+
|
|
108
|
+
url = "https://www.reddit.com/r/python/comments/abc123/cool_title/"
|
|
109
|
+
with pytest.raises(ValueError) as excinfo:
|
|
110
|
+
fetcher.fetch_thread(url)
|
|
111
|
+
|
|
112
|
+
message = str(excinfo.value)
|
|
113
|
+
assert "Non-JSON response" in message
|
|
114
|
+
assert "content_type=text/html" in message
|
|
115
|
+
|
|
@@ -14,6 +14,7 @@ class TestValidateRedditUrl:
|
|
|
14
14
|
"https://www.reddit.com/r/python/comments/abc123/cool_title/",
|
|
15
15
|
"http://reddit.com/r/AskReddit/comments/xyz789/interesting_question/",
|
|
16
16
|
"https://reddit.com/r/programming/comments/test123/test/extra/path/",
|
|
17
|
+
"https://reddit.com/r/python/comments/abc123/cool_title",
|
|
17
18
|
]
|
|
18
19
|
for url in valid_urls:
|
|
19
20
|
assert validate_reddit_url(url), f"Expected {url} to be valid"
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"""HTTP client for fetching Reddit data."""
|
|
2
|
-
|
|
3
|
-
import httpx
|
|
4
|
-
from typing import Dict, Any
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RedditFetcher:
|
|
8
|
-
"""Fetches Reddit thread data using the public JSON API."""
|
|
9
|
-
|
|
10
|
-
USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
|
|
11
|
-
TIMEOUT = 30.0
|
|
12
|
-
|
|
13
|
-
def __init__(self):
|
|
14
|
-
"""Initialize the fetcher with custom headers."""
|
|
15
|
-
self.headers = {
|
|
16
|
-
"User-Agent": self.USER_AGENT
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
def fetch_thread(self, url: str) -> Dict[str, Any]:
|
|
20
|
-
"""
|
|
21
|
-
Fetch a Reddit thread as JSON.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
url: Reddit thread URL (will be converted to JSON endpoint)
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
Dict containing Reddit API response
|
|
28
|
-
|
|
29
|
-
Raises:
|
|
30
|
-
httpx.HTTPError: If request fails
|
|
31
|
-
"""
|
|
32
|
-
# Ensure URL ends with .json
|
|
33
|
-
json_url = url.rstrip('/') + '.json'
|
|
34
|
-
|
|
35
|
-
with httpx.Client(headers=self.headers, timeout=self.TIMEOUT) as client:
|
|
36
|
-
response = client.get(json_url)
|
|
37
|
-
response.raise_for_status()
|
|
38
|
-
return response.json()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|