getred 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
getred/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """getred - A CLI tool to fetch Reddit threads and save them as structured JSON."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from getred.models import Thread, Comment
6
+
7
+ __all__ = ["Thread", "Comment", "__version__"]
getred/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m getred."""
2
+
3
+ from getred.cli import main
4
+
5
+ if __name__ == '__main__':
6
+ main()
getred/cli.py ADDED
@@ -0,0 +1,77 @@
1
+ """Command-line interface for getred."""
2
+
3
+ import sys
4
+ import click
5
+ from pathlib import Path
6
+ from getred import __version__
7
+ from getred.fetcher import RedditFetcher
8
+ from getred.parser import parse_thread
9
+ from getred.utils import validate_reddit_url, get_default_output_path, save_json
10
+
11
+
12
+ @click.command()
13
+ @click.argument('url')
14
+ @click.option(
15
+ '-o', '--output',
16
+ type=click.Path(path_type=Path),
17
+ help='Custom output path (default: ~/Downloads/<slug>.json)'
18
+ )
19
+ @click.option(
20
+ '-p', '--pretty/--no-pretty',
21
+ default=True,
22
+ help='Pretty-print JSON (default: enabled)'
23
+ )
24
+ @click.option(
25
+ '-q', '--quiet',
26
+ is_flag=True,
27
+ help='Suppress progress output'
28
+ )
29
+ @click.version_option(version=__version__, prog_name='getred')
30
+ def main(url: str, output: Path, pretty: bool, quiet: bool):
31
+ """
32
+ Fetch a Reddit thread and save it as structured JSON.
33
+
34
+ URL should be a full Reddit thread URL like:
35
+ https://www.reddit.com/r/python/comments/abc123/title/
36
+ """
37
+ # Validate URL
38
+ if not validate_reddit_url(url):
39
+ click.echo("Error: Invalid Reddit thread URL", err=True)
40
+ click.echo("Expected format: https://www.reddit.com/r/SUBREDDIT/comments/ID/TITLE/", err=True)
41
+ sys.exit(1)
42
+
43
+ # Determine output path
44
+ output_path = output if output else get_default_output_path(url)
45
+
46
+ if not quiet:
47
+ click.echo(f"Fetching thread from Reddit...")
48
+
49
+ try:
50
+ # Fetch thread data
51
+ fetcher = RedditFetcher()
52
+ json_data = fetcher.fetch_thread(url)
53
+
54
+ if not quiet:
55
+ click.echo(f"Parsing comments...")
56
+
57
+ # Parse into structured format
58
+ thread = parse_thread(json_data)
59
+
60
+ if not quiet:
61
+ click.echo(f"Found {thread.comment_count} comments (parsed {len(thread.comments)} top-level)")
62
+
63
+ # Save to file
64
+ save_json(thread.to_dict(), output_path, pretty=pretty)
65
+
66
+ if not quiet:
67
+ click.echo(f"✓ Saved to: {output_path}")
68
+ else:
69
+ click.echo(str(output_path))
70
+
71
+ except Exception as e:
72
+ click.echo(f"Error: {e}", err=True)
73
+ sys.exit(1)
74
+
75
+
76
+ if __name__ == '__main__':
77
+ main()
getred/fetcher.py ADDED
@@ -0,0 +1,38 @@
1
+ """HTTP client for fetching Reddit data."""
2
+
3
+ import httpx
4
+ from typing import Dict, Any
5
+
6
+
7
+ class RedditFetcher:
8
+ """Fetches Reddit thread data using the public JSON API."""
9
+
10
+ USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
11
+ TIMEOUT = 30.0
12
+
13
+ def __init__(self):
14
+ """Initialize the fetcher with custom headers."""
15
+ self.headers = {
16
+ "User-Agent": self.USER_AGENT
17
+ }
18
+
19
+ def fetch_thread(self, url: str) -> Dict[str, Any]:
20
+ """
21
+ Fetch a Reddit thread as JSON.
22
+
23
+ Args:
24
+ url: Reddit thread URL (will be converted to JSON endpoint)
25
+
26
+ Returns:
27
+ Dict containing Reddit API response
28
+
29
+ Raises:
30
+ httpx.HTTPError: If request fails
31
+ """
32
+ # Ensure URL ends with .json
33
+ json_url = url.rstrip('/') + '.json'
34
+
35
+ with httpx.Client(headers=self.headers, timeout=self.TIMEOUT) as client:
36
+ response = client.get(json_url)
37
+ response.raise_for_status()
38
+ return response.json()
getred/models.py ADDED
@@ -0,0 +1,62 @@
1
+ """Data models for Reddit threads and comments."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, Dict, Any
5
+
6
+
7
+ @dataclass
8
+ class Comment:
9
+ """Represents a Reddit comment."""
10
+
11
+ id: str
12
+ author: str
13
+ body: str
14
+ score: int
15
+ created_utc: str
16
+ depth: int
17
+ replies: List['Comment'] = field(default_factory=list)
18
+
19
+ def to_dict(self) -> Dict[str, Any]:
20
+ """Convert comment to dictionary format."""
21
+ return {
22
+ "id": self.id,
23
+ "author": self.author,
24
+ "body": self.body,
25
+ "score": self.score,
26
+ "created_utc": self.created_utc,
27
+ "depth": self.depth,
28
+ "replies": [reply.to_dict() for reply in self.replies]
29
+ }
30
+
31
+
32
+ @dataclass
33
+ class Thread:
34
+ """Represents a Reddit thread."""
35
+
36
+ id: str
37
+ title: str
38
+ author: str
39
+ subreddit: str
40
+ url: str
41
+ selftext: str
42
+ score: int
43
+ created_utc: str
44
+ fetched_at: str
45
+ comment_count: int
46
+ comments: List[Comment] = field(default_factory=list)
47
+
48
+ def to_dict(self) -> Dict[str, Any]:
49
+ """Convert thread to dictionary format."""
50
+ return {
51
+ "id": self.id,
52
+ "title": self.title,
53
+ "author": self.author,
54
+ "subreddit": self.subreddit,
55
+ "url": self.url,
56
+ "selftext": self.selftext,
57
+ "score": self.score,
58
+ "created_utc": self.created_utc,
59
+ "fetched_at": self.fetched_at,
60
+ "comment_count": self.comment_count,
61
+ "comments": [comment.to_dict() for comment in self.comments]
62
+ }
getred/parser.py ADDED
@@ -0,0 +1,99 @@
1
+ """Parser for Reddit JSON responses."""
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, Any, List
5
+ from getred.models import Thread, Comment
6
+
7
+
8
+ def parse_timestamp(timestamp: float) -> str:
9
+ """Convert Unix timestamp to ISO format string."""
10
+ return datetime.utcfromtimestamp(timestamp).isoformat() + 'Z'
11
+
12
+
13
+ def parse_comment(comment_data: Dict[str, Any], depth: int = 0) -> Comment:
14
+ """
15
+ Parse a comment from Reddit JSON data.
16
+
17
+ Args:
18
+ comment_data: Raw comment data from Reddit API
19
+ depth: Nesting depth of the comment
20
+
21
+ Returns:
22
+ Comment object with nested replies
23
+ """
24
+ data = comment_data.get('data', {})
25
+
26
+ # Handle deleted/removed comments
27
+ author = data.get('author', '[deleted]')
28
+ body = data.get('body', '[deleted]')
29
+
30
+ comment = Comment(
31
+ id=data.get('id', ''),
32
+ author=author,
33
+ body=body,
34
+ score=data.get('score', 0),
35
+ created_utc=parse_timestamp(data.get('created_utc', 0)),
36
+ depth=depth,
37
+ replies=[]
38
+ )
39
+
40
+ # Parse nested replies
41
+ replies_data = data.get('replies')
42
+ if replies_data and isinstance(replies_data, dict):
43
+ replies_listing = replies_data.get('data', {}).get('children', [])
44
+ for reply_data in replies_listing:
45
+ # Skip "more" objects that indicate additional comments
46
+ if reply_data.get('kind') == 't1':
47
+ comment.replies.append(parse_comment(reply_data, depth + 1))
48
+
49
+ return comment
50
+
51
+
52
+ def parse_comments(comments_listing: List[Dict[str, Any]]) -> List[Comment]:
53
+ """
54
+ Parse all top-level comments from the comments listing.
55
+
56
+ Args:
57
+ comments_listing: List of comment objects from Reddit API
58
+
59
+ Returns:
60
+ List of Comment objects
61
+ """
62
+ comments = []
63
+ for item in comments_listing:
64
+ # Only parse actual comments (kind = t1), skip "more" objects
65
+ if item.get('kind') == 't1':
66
+ comments.append(parse_comment(item, depth=0))
67
+
68
+ return comments
69
+
70
+
71
+ def parse_thread(json_data: List[Dict[str, Any]]) -> Thread:
72
+ """
73
+ Parse a Reddit thread from JSON response.
74
+
75
+ Args:
76
+ json_data: Raw JSON response from Reddit API (list with 2 elements)
77
+
78
+ Returns:
79
+ Thread object with all data and nested comments
80
+ """
81
+ # Reddit API returns [post_data, comments_data]
82
+ post_listing = json_data[0]['data']['children'][0]['data']
83
+ comments_listing = json_data[1]['data']['children']
84
+
85
+ thread = Thread(
86
+ id=post_listing.get('id', ''),
87
+ title=post_listing.get('title', ''),
88
+ author=post_listing.get('author', '[deleted]'),
89
+ subreddit=post_listing.get('subreddit', ''),
90
+ url=post_listing.get('url', ''),
91
+ selftext=post_listing.get('selftext', ''),
92
+ score=post_listing.get('score', 0),
93
+ created_utc=parse_timestamp(post_listing.get('created_utc', 0)),
94
+ fetched_at=datetime.utcnow().isoformat() + 'Z',
95
+ comment_count=post_listing.get('num_comments', 0),
96
+ comments=parse_comments(comments_listing)
97
+ )
98
+
99
+ return thread
getred/utils.py ADDED
@@ -0,0 +1,83 @@
1
+ """Utility functions for URL validation, slug generation, and file operations."""
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Dict, Any
7
+
8
+
9
+ def validate_reddit_url(url: str) -> bool:
10
+ """
11
+ Validate that a URL is a Reddit thread URL.
12
+
13
+ Args:
14
+ url: URL to validate
15
+
16
+ Returns:
17
+ True if valid Reddit thread URL, False otherwise
18
+ """
19
+ pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+/'
20
+ return bool(re.match(pattern, url))
21
+
22
+
23
+ def generate_slug(url: str) -> str:
24
+ """
25
+ Generate a filename slug from a Reddit URL.
26
+
27
+ Extracts the thread ID and title from the URL.
28
+ Example: https://reddit.com/r/python/comments/abc123/cool_title/
29
+ Returns: abc123_cool_title
30
+
31
+ Args:
32
+ url: Reddit thread URL
33
+
34
+ Returns:
35
+ Slug string suitable for filename
36
+ """
37
+ # Extract thread ID and title from URL
38
+ # Pattern: /r/subreddit/comments/ID/title/
39
+ match = re.search(r'/comments/([^/]+)/([^/]+)', url)
40
+ if match:
41
+ thread_id = match.group(1)
42
+ title_slug = match.group(2)
43
+ return f"{thread_id}_{title_slug}"
44
+
45
+ # Fallback to just using the thread ID
46
+ match = re.search(r'/comments/([^/]+)', url)
47
+ if match:
48
+ return match.group(1)
49
+
50
+ return "reddit_thread"
51
+
52
+
53
+ def get_default_output_path(url: str) -> Path:
54
+ """
55
+ Generate default output path in ~/Downloads.
56
+
57
+ Args:
58
+ url: Reddit thread URL
59
+
60
+ Returns:
61
+ Path object for output file
62
+ """
63
+ downloads_dir = Path.home() / "Downloads"
64
+ slug = generate_slug(url)
65
+ return downloads_dir / f"{slug}.json"
66
+
67
+
68
+ def save_json(data: Dict[str, Any], output_path: Path, pretty: bool = True) -> None:
69
+ """
70
+ Save data as JSON file.
71
+
72
+ Args:
73
+ data: Dictionary to save
74
+ output_path: Path where to save the file
75
+ pretty: Whether to pretty-print the JSON (default: True)
76
+ """
77
+ output_path.parent.mkdir(parents=True, exist_ok=True)
78
+
79
+ with open(output_path, 'w', encoding='utf-8') as f:
80
+ if pretty:
81
+ json.dump(data, f, indent=2, ensure_ascii=False)
82
+ else:
83
+ json.dump(data, f, ensure_ascii=False)
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: getred
3
+ Version: 0.1.3
4
+ Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
5
+ Project-URL: Homepage, https://github.com/mgelei/getred
6
+ Project-URL: Issues, https://github.com/mgelei/getred/issues
7
+ Author-email: Mate Gelei-Szego <hello@mategelei.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.8
14
+ Requires-Dist: click>=8.0.0
15
+ Requires-Dist: httpx>=0.24.0
16
+ Provides-Extra: test
17
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # getred
21
+ Fetches a Reddit thread in a structured JSON
@@ -0,0 +1,12 @@
1
+ getred/__init__.py,sha256=OV8_4Tk9tyvGAfZ4flBb_clJWWhQzPyMNjbsmdo0YJc,198
2
+ getred/__main__.py,sha256=feAHoe3sKnTtTZZQ8CQntbtVBbkDL3EqaldR6LpLU48,108
3
+ getred/cli.py,sha256=ZcXE9yirkiwwmfohxSpsuaoC8lt_ubY4BgoPWYZQ_bI,2149
4
+ getred/fetcher.py,sha256=kdFb8lWAdQzEFGzGaSKI2W-YQyZyD5tclSDXC2j5o_o,1033
5
+ getred/models.py,sha256=DJGHsXQJnKdgUSv_mXzsQd9luzsamw5UgkZT2WAZHBg,1613
6
+ getred/parser.py,sha256=hx_SHTZEcCmkfS1F2E8vlj5Z-v_xYRhJtcxiEFABJK4,3066
7
+ getred/utils.py,sha256=z4mKfCbME6ffi9PC7CnMcYZNFEMtVDTF5JJKMoBgirg,2155
8
+ getred-0.1.3.dist-info/METADATA,sha256=1KoRMD9X42ZYdC0xSJtG-SMxk6Z6t7YaLE67enUdsoY,729
9
+ getred-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
10
+ getred-0.1.3.dist-info/entry_points.txt,sha256=sUbiNDbmjeRZLW1zij_nhtxM9761F6DMmGeRl60xenY,43
11
+ getred-0.1.3.dist-info/licenses/LICENSE,sha256=GJ-Sk2Q9pSMeuVlqqZQe5P5DLvOjKQRVpTA1fy_JftI,1073
12
+ getred-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ getred = getred.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mate Gelei-Szego
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.