getred 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getred/__init__.py +7 -0
- getred/__main__.py +6 -0
- getred/cli.py +77 -0
- getred/fetcher.py +38 -0
- getred/models.py +62 -0
- getred/parser.py +99 -0
- getred/utils.py +83 -0
- getred-0.1.3.dist-info/METADATA +21 -0
- getred-0.1.3.dist-info/RECORD +12 -0
- getred-0.1.3.dist-info/WHEEL +4 -0
- getred-0.1.3.dist-info/entry_points.txt +2 -0
- getred-0.1.3.dist-info/licenses/LICENSE +21 -0
getred/__init__.py
ADDED
getred/__main__.py
ADDED
getred/cli.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Command-line interface for getred."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import click
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from getred import __version__
|
|
7
|
+
from getred.fetcher import RedditFetcher
|
|
8
|
+
from getred.parser import parse_thread
|
|
9
|
+
from getred.utils import validate_reddit_url, get_default_output_path, save_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument('url')
|
|
14
|
+
@click.option(
|
|
15
|
+
'-o', '--output',
|
|
16
|
+
type=click.Path(path_type=Path),
|
|
17
|
+
help='Custom output path (default: ~/Downloads/<slug>.json)'
|
|
18
|
+
)
|
|
19
|
+
@click.option(
|
|
20
|
+
'-p', '--pretty/--no-pretty',
|
|
21
|
+
default=True,
|
|
22
|
+
help='Pretty-print JSON (default: enabled)'
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
'-q', '--quiet',
|
|
26
|
+
is_flag=True,
|
|
27
|
+
help='Suppress progress output'
|
|
28
|
+
)
|
|
29
|
+
@click.version_option(version=__version__, prog_name='getred')
|
|
30
|
+
def main(url: str, output: Path, pretty: bool, quiet: bool):
|
|
31
|
+
"""
|
|
32
|
+
Fetch a Reddit thread and save it as structured JSON.
|
|
33
|
+
|
|
34
|
+
URL should be a full Reddit thread URL like:
|
|
35
|
+
https://www.reddit.com/r/python/comments/abc123/title/
|
|
36
|
+
"""
|
|
37
|
+
# Validate URL
|
|
38
|
+
if not validate_reddit_url(url):
|
|
39
|
+
click.echo("Error: Invalid Reddit thread URL", err=True)
|
|
40
|
+
click.echo("Expected format: https://www.reddit.com/r/SUBREDDIT/comments/ID/TITLE/", err=True)
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
# Determine output path
|
|
44
|
+
output_path = output if output else get_default_output_path(url)
|
|
45
|
+
|
|
46
|
+
if not quiet:
|
|
47
|
+
click.echo(f"Fetching thread from Reddit...")
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
# Fetch thread data
|
|
51
|
+
fetcher = RedditFetcher()
|
|
52
|
+
json_data = fetcher.fetch_thread(url)
|
|
53
|
+
|
|
54
|
+
if not quiet:
|
|
55
|
+
click.echo(f"Parsing comments...")
|
|
56
|
+
|
|
57
|
+
# Parse into structured format
|
|
58
|
+
thread = parse_thread(json_data)
|
|
59
|
+
|
|
60
|
+
if not quiet:
|
|
61
|
+
click.echo(f"Found {thread.comment_count} comments (parsed {len(thread.comments)} top-level)")
|
|
62
|
+
|
|
63
|
+
# Save to file
|
|
64
|
+
save_json(thread.to_dict(), output_path, pretty=pretty)
|
|
65
|
+
|
|
66
|
+
if not quiet:
|
|
67
|
+
click.echo(f"✓ Saved to: {output_path}")
|
|
68
|
+
else:
|
|
69
|
+
click.echo(str(output_path))
|
|
70
|
+
|
|
71
|
+
except Exception as e:
|
|
72
|
+
click.echo(f"Error: {e}", err=True)
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == '__main__':
|
|
77
|
+
main()
|
getred/fetcher.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""HTTP client for fetching Reddit data."""
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RedditFetcher:
|
|
8
|
+
"""Fetches Reddit thread data using the public JSON API."""
|
|
9
|
+
|
|
10
|
+
USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
|
|
11
|
+
TIMEOUT = 30.0
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
"""Initialize the fetcher with custom headers."""
|
|
15
|
+
self.headers = {
|
|
16
|
+
"User-Agent": self.USER_AGENT
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def fetch_thread(self, url: str) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Fetch a Reddit thread as JSON.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
url: Reddit thread URL (will be converted to JSON endpoint)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Dict containing Reddit API response
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
httpx.HTTPError: If request fails
|
|
31
|
+
"""
|
|
32
|
+
# Ensure URL ends with .json
|
|
33
|
+
json_url = url.rstrip('/') + '.json'
|
|
34
|
+
|
|
35
|
+
with httpx.Client(headers=self.headers, timeout=self.TIMEOUT) as client:
|
|
36
|
+
response = client.get(json_url)
|
|
37
|
+
response.raise_for_status()
|
|
38
|
+
return response.json()
|
getred/models.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Data models for Reddit threads and comments."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Comment:
|
|
9
|
+
"""Represents a Reddit comment."""
|
|
10
|
+
|
|
11
|
+
id: str
|
|
12
|
+
author: str
|
|
13
|
+
body: str
|
|
14
|
+
score: int
|
|
15
|
+
created_utc: str
|
|
16
|
+
depth: int
|
|
17
|
+
replies: List['Comment'] = field(default_factory=list)
|
|
18
|
+
|
|
19
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
20
|
+
"""Convert comment to dictionary format."""
|
|
21
|
+
return {
|
|
22
|
+
"id": self.id,
|
|
23
|
+
"author": self.author,
|
|
24
|
+
"body": self.body,
|
|
25
|
+
"score": self.score,
|
|
26
|
+
"created_utc": self.created_utc,
|
|
27
|
+
"depth": self.depth,
|
|
28
|
+
"replies": [reply.to_dict() for reply in self.replies]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Thread:
|
|
34
|
+
"""Represents a Reddit thread."""
|
|
35
|
+
|
|
36
|
+
id: str
|
|
37
|
+
title: str
|
|
38
|
+
author: str
|
|
39
|
+
subreddit: str
|
|
40
|
+
url: str
|
|
41
|
+
selftext: str
|
|
42
|
+
score: int
|
|
43
|
+
created_utc: str
|
|
44
|
+
fetched_at: str
|
|
45
|
+
comment_count: int
|
|
46
|
+
comments: List[Comment] = field(default_factory=list)
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
49
|
+
"""Convert thread to dictionary format."""
|
|
50
|
+
return {
|
|
51
|
+
"id": self.id,
|
|
52
|
+
"title": self.title,
|
|
53
|
+
"author": self.author,
|
|
54
|
+
"subreddit": self.subreddit,
|
|
55
|
+
"url": self.url,
|
|
56
|
+
"selftext": self.selftext,
|
|
57
|
+
"score": self.score,
|
|
58
|
+
"created_utc": self.created_utc,
|
|
59
|
+
"fetched_at": self.fetched_at,
|
|
60
|
+
"comment_count": self.comment_count,
|
|
61
|
+
"comments": [comment.to_dict() for comment in self.comments]
|
|
62
|
+
}
|
getred/parser.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Parser for Reddit JSON responses."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Dict, Any, List
|
|
5
|
+
from getred.models import Thread, Comment
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_timestamp(timestamp: float) -> str:
|
|
9
|
+
"""Convert Unix timestamp to ISO format string."""
|
|
10
|
+
return datetime.utcfromtimestamp(timestamp).isoformat() + 'Z'
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_comment(comment_data: Dict[str, Any], depth: int = 0) -> Comment:
|
|
14
|
+
"""
|
|
15
|
+
Parse a comment from Reddit JSON data.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
comment_data: Raw comment data from Reddit API
|
|
19
|
+
depth: Nesting depth of the comment
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Comment object with nested replies
|
|
23
|
+
"""
|
|
24
|
+
data = comment_data.get('data', {})
|
|
25
|
+
|
|
26
|
+
# Handle deleted/removed comments
|
|
27
|
+
author = data.get('author', '[deleted]')
|
|
28
|
+
body = data.get('body', '[deleted]')
|
|
29
|
+
|
|
30
|
+
comment = Comment(
|
|
31
|
+
id=data.get('id', ''),
|
|
32
|
+
author=author,
|
|
33
|
+
body=body,
|
|
34
|
+
score=data.get('score', 0),
|
|
35
|
+
created_utc=parse_timestamp(data.get('created_utc', 0)),
|
|
36
|
+
depth=depth,
|
|
37
|
+
replies=[]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Parse nested replies
|
|
41
|
+
replies_data = data.get('replies')
|
|
42
|
+
if replies_data and isinstance(replies_data, dict):
|
|
43
|
+
replies_listing = replies_data.get('data', {}).get('children', [])
|
|
44
|
+
for reply_data in replies_listing:
|
|
45
|
+
# Skip "more" objects that indicate additional comments
|
|
46
|
+
if reply_data.get('kind') == 't1':
|
|
47
|
+
comment.replies.append(parse_comment(reply_data, depth + 1))
|
|
48
|
+
|
|
49
|
+
return comment
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_comments(comments_listing: List[Dict[str, Any]]) -> List[Comment]:
|
|
53
|
+
"""
|
|
54
|
+
Parse all top-level comments from the comments listing.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
comments_listing: List of comment objects from Reddit API
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of Comment objects
|
|
61
|
+
"""
|
|
62
|
+
comments = []
|
|
63
|
+
for item in comments_listing:
|
|
64
|
+
# Only parse actual comments (kind = t1), skip "more" objects
|
|
65
|
+
if item.get('kind') == 't1':
|
|
66
|
+
comments.append(parse_comment(item, depth=0))
|
|
67
|
+
|
|
68
|
+
return comments
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def parse_thread(json_data: List[Dict[str, Any]]) -> Thread:
|
|
72
|
+
"""
|
|
73
|
+
Parse a Reddit thread from JSON response.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
json_data: Raw JSON response from Reddit API (list with 2 elements)
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Thread object with all data and nested comments
|
|
80
|
+
"""
|
|
81
|
+
# Reddit API returns [post_data, comments_data]
|
|
82
|
+
post_listing = json_data[0]['data']['children'][0]['data']
|
|
83
|
+
comments_listing = json_data[1]['data']['children']
|
|
84
|
+
|
|
85
|
+
thread = Thread(
|
|
86
|
+
id=post_listing.get('id', ''),
|
|
87
|
+
title=post_listing.get('title', ''),
|
|
88
|
+
author=post_listing.get('author', '[deleted]'),
|
|
89
|
+
subreddit=post_listing.get('subreddit', ''),
|
|
90
|
+
url=post_listing.get('url', ''),
|
|
91
|
+
selftext=post_listing.get('selftext', ''),
|
|
92
|
+
score=post_listing.get('score', 0),
|
|
93
|
+
created_utc=parse_timestamp(post_listing.get('created_utc', 0)),
|
|
94
|
+
fetched_at=datetime.utcnow().isoformat() + 'Z',
|
|
95
|
+
comment_count=post_listing.get('num_comments', 0),
|
|
96
|
+
comments=parse_comments(comments_listing)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return thread
|
getred/utils.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Utility functions for URL validation, slug generation, and file operations."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def validate_reddit_url(url: str) -> bool:
|
|
10
|
+
"""
|
|
11
|
+
Validate that a URL is a Reddit thread URL.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
url: URL to validate
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
True if valid Reddit thread URL, False otherwise
|
|
18
|
+
"""
|
|
19
|
+
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+/'
|
|
20
|
+
return bool(re.match(pattern, url))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def generate_slug(url: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Generate a filename slug from a Reddit URL.
|
|
26
|
+
|
|
27
|
+
Extracts the thread ID and title from the URL.
|
|
28
|
+
Example: https://reddit.com/r/python/comments/abc123/cool_title/
|
|
29
|
+
Returns: abc123_cool_title
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
url: Reddit thread URL
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Slug string suitable for filename
|
|
36
|
+
"""
|
|
37
|
+
# Extract thread ID and title from URL
|
|
38
|
+
# Pattern: /r/subreddit/comments/ID/title/
|
|
39
|
+
match = re.search(r'/comments/([^/]+)/([^/]+)', url)
|
|
40
|
+
if match:
|
|
41
|
+
thread_id = match.group(1)
|
|
42
|
+
title_slug = match.group(2)
|
|
43
|
+
return f"{thread_id}_{title_slug}"
|
|
44
|
+
|
|
45
|
+
# Fallback to just using the thread ID
|
|
46
|
+
match = re.search(r'/comments/([^/]+)', url)
|
|
47
|
+
if match:
|
|
48
|
+
return match.group(1)
|
|
49
|
+
|
|
50
|
+
return "reddit_thread"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_default_output_path(url: str) -> Path:
|
|
54
|
+
"""
|
|
55
|
+
Generate default output path in ~/Downloads.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
url: Reddit thread URL
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path object for output file
|
|
62
|
+
"""
|
|
63
|
+
downloads_dir = Path.home() / "Downloads"
|
|
64
|
+
slug = generate_slug(url)
|
|
65
|
+
return downloads_dir / f"{slug}.json"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def save_json(data: Dict[str, Any], output_path: Path, pretty: bool = True) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Save data as JSON file.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
data: Dictionary to save
|
|
74
|
+
output_path: Path where to save the file
|
|
75
|
+
pretty: Whether to pretty-print the JSON (default: True)
|
|
76
|
+
"""
|
|
77
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
|
|
79
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
80
|
+
if pretty:
|
|
81
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
82
|
+
else:
|
|
83
|
+
json.dump(data, f, ensure_ascii=False)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: getred
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
|
|
5
|
+
Project-URL: Homepage, https://github.com/mgelei/getred
|
|
6
|
+
Project-URL: Issues, https://github.com/mgelei/getred/issues
|
|
7
|
+
Author-email: Mate Gelei-Szego <hello@mategelei.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Requires-Dist: click>=8.0.0
|
|
15
|
+
Requires-Dist: httpx>=0.24.0
|
|
16
|
+
Provides-Extra: test
|
|
17
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# getred
|
|
21
|
+
Fetches a Reddit thread in a structured JSON
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
getred/__init__.py,sha256=OV8_4Tk9tyvGAfZ4flBb_clJWWhQzPyMNjbsmdo0YJc,198
|
|
2
|
+
getred/__main__.py,sha256=feAHoe3sKnTtTZZQ8CQntbtVBbkDL3EqaldR6LpLU48,108
|
|
3
|
+
getred/cli.py,sha256=ZcXE9yirkiwwmfohxSpsuaoC8lt_ubY4BgoPWYZQ_bI,2149
|
|
4
|
+
getred/fetcher.py,sha256=kdFb8lWAdQzEFGzGaSKI2W-YQyZyD5tclSDXC2j5o_o,1033
|
|
5
|
+
getred/models.py,sha256=DJGHsXQJnKdgUSv_mXzsQd9luzsamw5UgkZT2WAZHBg,1613
|
|
6
|
+
getred/parser.py,sha256=hx_SHTZEcCmkfS1F2E8vlj5Z-v_xYRhJtcxiEFABJK4,3066
|
|
7
|
+
getred/utils.py,sha256=z4mKfCbME6ffi9PC7CnMcYZNFEMtVDTF5JJKMoBgirg,2155
|
|
8
|
+
getred-0.1.3.dist-info/METADATA,sha256=1KoRMD9X42ZYdC0xSJtG-SMxk6Z6t7YaLE67enUdsoY,729
|
|
9
|
+
getred-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
getred-0.1.3.dist-info/entry_points.txt,sha256=sUbiNDbmjeRZLW1zij_nhtxM9761F6DMmGeRl60xenY,43
|
|
11
|
+
getred-0.1.3.dist-info/licenses/LICENSE,sha256=GJ-Sk2Q9pSMeuVlqqZQe5P5DLvOjKQRVpTA1fy_JftI,1073
|
|
12
|
+
getred-0.1.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mate Gelei-Szego
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|