getred 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getred-0.1.3/.claude/agents/pytest-runner.md +89 -0
- getred-0.1.3/.github/workflows/publish.yml +56 -0
- getred-0.1.3/.github/workflows/version-check.yml +43 -0
- getred-0.1.3/.gitignore +207 -0
- getred-0.1.3/CLAUDE.md +65 -0
- getred-0.1.3/LICENSE +21 -0
- getred-0.1.3/PKG-INFO +21 -0
- getred-0.1.3/README.md +2 -0
- getred-0.1.3/pyproject.toml +41 -0
- getred-0.1.3/src/getred/__init__.py +7 -0
- getred-0.1.3/src/getred/__main__.py +6 -0
- getred-0.1.3/src/getred/cli.py +77 -0
- getred-0.1.3/src/getred/fetcher.py +38 -0
- getred-0.1.3/src/getred/models.py +62 -0
- getred-0.1.3/src/getred/parser.py +99 -0
- getred-0.1.3/src/getred/utils.py +83 -0
- getred-0.1.3/tests/__init__.py +1 -0
- getred-0.1.3/tests/conftest.py +105 -0
- getred-0.1.3/tests/test_models.py +108 -0
- getred-0.1.3/tests/test_parser.py +132 -0
- getred-0.1.3/tests/test_utils.py +56 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pytest-runner
|
|
3
|
+
description: "Use this agent when unit tests need to be executed via pytest."
|
|
4
|
+
tools: Bash, Glob, Grep, Read, WebFetch, WebSearch, Skill, TaskCreate, TaskGet, TaskUpdate, TaskList, ToolSearch
|
|
5
|
+
model: haiku
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a specialized test execution agent with expertise in pytest and Python testing frameworks. Your sole responsibility is to run unit tests and provide clear, actionable results.
|
|
9
|
+
|
|
10
|
+
## Core Responsibilities
|
|
11
|
+
|
|
12
|
+
1. **Test Execution**: Run pytest with appropriate flags and configuration to execute the requested tests
|
|
13
|
+
2. **Results Analysis**: Parse test output to extract key metrics (pass/fail counts, percentages, duration)
|
|
14
|
+
3. **Failure Reporting**: Provide concise but sufficient detail on failing tests to enable quick debugging
|
|
15
|
+
4. **Output Formatting**: Present results in a clear, scannable format that highlights the most important information first
|
|
16
|
+
|
|
17
|
+
## Execution Protocol
|
|
18
|
+
|
|
19
|
+
1. **Determine Test Scope**:
|
|
20
|
+
- If specific tests/modules are mentioned, run only those (e.g., `pytest tests/test_parser.py`)
|
|
21
|
+
- If specific test functions are mentioned, target them (e.g., `pytest tests/test_utils.py::test_validate_url`)
|
|
22
|
+
- If no specification is given, run the entire test suite (`pytest`)
|
|
23
|
+
- Always run from the project root directory
|
|
24
|
+
|
|
25
|
+
2. **Use Appropriate Flags**:
|
|
26
|
+
- Use `-v` for verbose output to get detailed test names
|
|
27
|
+
- Use `--tb=short` for concise tracebacks
|
|
28
|
+
- Use `-x` to stop at first failure only if explicitly requested
|
|
29
|
+
- Use `--no-header` and `--no-summary` to reduce noise when needed
|
|
30
|
+
|
|
31
|
+
3. **Execute Tests**: Run pytest using the bash tool with the determined scope and flags. Very important: always use a virtual environment when working with Python.
|
|
32
|
+
|
|
33
|
+
4. **Parse Output**: Extract from pytest output:
|
|
34
|
+
- Total number of tests run
|
|
35
|
+
- Number passed, failed, skipped, errored
|
|
36
|
+
- Pass percentage (calculate as: passed / total * 100)
|
|
37
|
+
- Execution time
|
|
38
|
+
- Names of failed tests
|
|
39
|
+
- Key error messages or assertion failures
|
|
40
|
+
|
|
41
|
+
## Output Format
|
|
42
|
+
|
|
43
|
+
Present results in this structure:
|
|
44
|
+
|
|
45
|
+
**Test Results Summary**
|
|
46
|
+
- **Pass Rate**: X% (Y/Z tests passed)
|
|
47
|
+
- **Duration**: Xs
|
|
48
|
+
|
|
49
|
+
If all tests passed:
|
|
50
|
+
✅ All tests passed successfully!
|
|
51
|
+
|
|
52
|
+
If tests failed:
|
|
53
|
+
❌ **Failed Tests** (N failures):
|
|
54
|
+
|
|
55
|
+
1. `test_module::test_name`
|
|
56
|
+
- Error: Brief description of failure
|
|
57
|
+
- Location: file:line
|
|
58
|
+
|
|
59
|
+
2. `test_module::test_name`
|
|
60
|
+
- Error: Brief description of failure
|
|
61
|
+
- Location: file:line
|
|
62
|
+
|
|
63
|
+
[Include relevant traceback snippets only if they add clarity]
|
|
64
|
+
|
|
65
|
+
If tests were skipped:
|
|
66
|
+
ℹ️ Skipped: N tests
|
|
67
|
+
|
|
68
|
+
## Guidelines
|
|
69
|
+
|
|
70
|
+
- **Be Concise**: Focus on actionable information. Avoid repeating full tracebacks unless necessary for understanding.
|
|
71
|
+
- **Highlight Failures**: Failed tests are the priority. Make them immediately visible.
|
|
72
|
+
- **Calculate Accurately**: Always show pass percentage as a clear metric of test health.
|
|
73
|
+
- **Preserve Context**: Include enough error detail to understand what went wrong without needing to re-run tests.
|
|
74
|
+
- **Handle Edge Cases**:
|
|
75
|
+
- If pytest isn't installed, clearly state this and suggest installation
|
|
76
|
+
- If no tests are found, report this explicitly
|
|
77
|
+
- If tests error during collection, report collection errors separately from test failures
|
|
78
|
+
- **No Interpretation**: Report facts from test execution. Don't speculate about why tests failed or suggest fixes unless explicitly asked.
|
|
79
|
+
- **Respect Verbosity**: If the user asks for detailed output, include full tracebacks and debugging information.
|
|
80
|
+
|
|
81
|
+
## Self-Verification
|
|
82
|
+
|
|
83
|
+
Before returning results:
|
|
84
|
+
1. Confirm pytest command executed successfully (even if tests failed)
|
|
85
|
+
2. Verify pass percentage calculation is correct
|
|
86
|
+
3. Check that all failed test names are captured
|
|
87
|
+
4. Ensure error messages are meaningful and not truncated mid-sentence
|
|
88
|
+
|
|
89
|
+
You are the definitive source for test execution results. Be accurate, concise, and action-oriented.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
name: Run tests
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- name: Checkout repository
|
|
14
|
+
uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
python -m pip install -e ".[test]"
|
|
25
|
+
|
|
26
|
+
- name: Run tests
|
|
27
|
+
run: pytest
|
|
28
|
+
|
|
29
|
+
build-and-publish:
|
|
30
|
+
name: Build and publish to PyPI
|
|
31
|
+
needs: test
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
|
|
34
|
+
permissions:
|
|
35
|
+
id-token: write
|
|
36
|
+
contents: read
|
|
37
|
+
|
|
38
|
+
steps:
|
|
39
|
+
- name: Checkout repository
|
|
40
|
+
uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Set up Python
|
|
43
|
+
uses: actions/setup-python@v5
|
|
44
|
+
with:
|
|
45
|
+
python-version: "3.12"
|
|
46
|
+
|
|
47
|
+
- name: Install build dependencies
|
|
48
|
+
run: |
|
|
49
|
+
python -m pip install --upgrade pip
|
|
50
|
+
python -m pip install build
|
|
51
|
+
|
|
52
|
+
- name: Build package
|
|
53
|
+
run: python -m build
|
|
54
|
+
|
|
55
|
+
- name: Publish to PyPI
|
|
56
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Version Bump Check
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [master]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
check-version:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- name: Checkout PR branch
|
|
12
|
+
uses: actions/checkout@v4
|
|
13
|
+
with:
|
|
14
|
+
fetch-depth: 0
|
|
15
|
+
|
|
16
|
+
- name: Get PR branch version
|
|
17
|
+
id: pr-version
|
|
18
|
+
run: |
|
|
19
|
+
PR_VERSION=$(grep -Po '(?<=^version = ")[^"]+' pyproject.toml)
|
|
20
|
+
echo "version=$PR_VERSION" >> $GITHUB_OUTPUT
|
|
21
|
+
echo "PR branch version: $PR_VERSION"
|
|
22
|
+
|
|
23
|
+
- name: Get master branch version
|
|
24
|
+
id: master-version
|
|
25
|
+
run: |
|
|
26
|
+
MASTER_VERSION=$(git show origin/master:pyproject.toml | grep -Po '(?<=^version = ")[^"]+')
|
|
27
|
+
echo "version=$MASTER_VERSION" >> $GITHUB_OUTPUT
|
|
28
|
+
echo "Master branch version: $MASTER_VERSION"
|
|
29
|
+
|
|
30
|
+
- name: Compare versions
|
|
31
|
+
run: |
|
|
32
|
+
if [ "${{ steps.pr-version.outputs.version }}" == "${{ steps.master-version.outputs.version }}" ]; then
|
|
33
|
+
echo "❌ Version bump required!"
|
|
34
|
+
echo "Current version in PR: ${{ steps.pr-version.outputs.version }}"
|
|
35
|
+
echo "Version in master: ${{ steps.master-version.outputs.version }}"
|
|
36
|
+
echo ""
|
|
37
|
+
echo "Please update the version in pyproject.toml before merging to master."
|
|
38
|
+
exit 1
|
|
39
|
+
else
|
|
40
|
+
echo "✅ Version bumped successfully!"
|
|
41
|
+
echo "Old version (master): ${{ steps.master-version.outputs.version }}"
|
|
42
|
+
echo "New version (PR): ${{ steps.pr-version.outputs.version }}"
|
|
43
|
+
fi
|
getred-0.1.3/.gitignore
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
getred-0.1.3/CLAUDE.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
`getred` is a CLI tool that fetches Reddit threads and saves them as structured JSON files. It uses Reddit's public JSON API (no authentication required) to retrieve thread data including all nested comments.
|
|
8
|
+
|
|
9
|
+
## Development Commands
|
|
10
|
+
|
|
11
|
+
### Installation
|
|
12
|
+
```bash
|
|
13
|
+
# Create and activate virtual environment
|
|
14
|
+
python -m venv .venv
|
|
15
|
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
|
16
|
+
|
|
17
|
+
# Install in development mode
|
|
18
|
+
pip install -e .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Running the CLI
|
|
22
|
+
```bash
|
|
23
|
+
# After installation
|
|
24
|
+
getred <reddit_url>
|
|
25
|
+
|
|
26
|
+
# Or directly via Python module
|
|
27
|
+
python -m getred <reddit_url>
|
|
28
|
+
|
|
29
|
+
# Common options
|
|
30
|
+
getred <url> -o /path/to/output.json # Custom output path
|
|
31
|
+
getred <url> --no-pretty # Compact JSON output
|
|
32
|
+
getred <url> -q # Quiet mode
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Architecture
|
|
36
|
+
|
|
37
|
+
### Data Flow
|
|
38
|
+
1. **CLI Layer** (`cli.py`): Handles command-line arguments and orchestrates the fetch-parse-save pipeline
|
|
39
|
+
2. **Fetcher** (`fetcher.py`): HTTP client that requests Reddit's `.json` endpoint using httpx
|
|
40
|
+
3. **Parser** (`parser.py`): Converts Reddit's JSON response into structured `Thread` and `Comment` models
|
|
41
|
+
4. **Models** (`models.py`): Dataclass definitions with `to_dict()` methods for serialization
|
|
42
|
+
5. **Utils** (`utils.py`): URL validation, slug generation, and file I/O helpers
|
|
43
|
+
|
|
44
|
+
### Key Design Patterns
|
|
45
|
+
|
|
46
|
+
**Reddit API Response Structure**: The API returns a 2-element list:
|
|
47
|
+
- `json_data[0]`: Post/thread data (single item listing)
|
|
48
|
+
- `json_data[1]`: Comments data (listing with nested replies)
|
|
49
|
+
|
|
50
|
+
**Comment Nesting**: Comments are recursively parsed with `depth` tracking. Each comment has a `replies` list containing nested `Comment` objects. The parser handles Reddit's "more" objects (kind='more') by skipping them - these indicate additional comments not included in the initial response.
|
|
51
|
+
|
|
52
|
+
**Output Format**: By default, files are saved to `~/Downloads/<thread_id>_<title_slug>.json` with pretty-printed JSON. Thread metadata includes `fetched_at` timestamp in ISO format.
|
|
53
|
+
|
|
54
|
+
## Project Structure
|
|
55
|
+
- `src/getred/`: Main package
|
|
56
|
+
- `models.py`: Thread and Comment dataclasses
|
|
57
|
+
- `fetcher.py`: RedditFetcher class (httpx-based HTTP client)
|
|
58
|
+
- `parser.py`: Recursive comment parsing logic
|
|
59
|
+
- `cli.py`: Click-based CLI entry point
|
|
60
|
+
- `utils.py`: URL validation, slug generation, file operations
|
|
61
|
+
- `__main__.py`: Module entry point for `python -m getred`
|
|
62
|
+
|
|
63
|
+
## Custom instructions
|
|
64
|
+
- Always use a virtual environment when working with Python (`.venv`)
|
|
65
|
+
- Never run tests directly, always use the pytest-runner agent to do that
|
getred-0.1.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mate Gelei-Szego
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
getred-0.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: getred
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: A CLI tool to fetch Reddit threads and save them as structured JSON
|
|
5
|
+
Project-URL: Homepage, https://github.com/mgelei/getred
|
|
6
|
+
Project-URL: Issues, https://github.com/mgelei/getred/issues
|
|
7
|
+
Author-email: Mate Gelei-Szego <hello@mategelei.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Requires-Dist: click>=8.0.0
|
|
15
|
+
Requires-Dist: httpx>=0.24.0
|
|
16
|
+
Provides-Extra: test
|
|
17
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# getred
|
|
21
|
+
Fetches a Reddit thread in a structured JSON
|
getred-0.1.3/README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "getred"
|
|
7
|
+
version = "0.1.3"
|
|
8
|
+
description = "A CLI tool to fetch Reddit threads and save them as structured JSON"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{ name="Mate Gelei-Szego", email="hello@mategelei.com" }
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 5 - Production/Stable",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"click>=8.0.0",
|
|
22
|
+
"httpx>=0.24.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
getred = "getred.cli:main"
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
test = [
|
|
30
|
+
"pytest>=7.0.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/mgelei/getred"
|
|
35
|
+
Issues = "https://github.com/mgelei/getred/issues"
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.wheel]
|
|
38
|
+
packages = ["src/getred"]
|
|
39
|
+
|
|
40
|
+
[tool.pytest.ini_options]
|
|
41
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Command-line interface for getred."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import click
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from getred import __version__
|
|
7
|
+
from getred.fetcher import RedditFetcher
|
|
8
|
+
from getred.parser import parse_thread
|
|
9
|
+
from getred.utils import validate_reddit_url, get_default_output_path, save_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument('url')
|
|
14
|
+
@click.option(
|
|
15
|
+
'-o', '--output',
|
|
16
|
+
type=click.Path(path_type=Path),
|
|
17
|
+
help='Custom output path (default: ~/Downloads/<slug>.json)'
|
|
18
|
+
)
|
|
19
|
+
@click.option(
|
|
20
|
+
'-p', '--pretty/--no-pretty',
|
|
21
|
+
default=True,
|
|
22
|
+
help='Pretty-print JSON (default: enabled)'
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
'-q', '--quiet',
|
|
26
|
+
is_flag=True,
|
|
27
|
+
help='Suppress progress output'
|
|
28
|
+
)
|
|
29
|
+
@click.version_option(version=__version__, prog_name='getred')
|
|
30
|
+
def main(url: str, output: Path, pretty: bool, quiet: bool):
|
|
31
|
+
"""
|
|
32
|
+
Fetch a Reddit thread and save it as structured JSON.
|
|
33
|
+
|
|
34
|
+
URL should be a full Reddit thread URL like:
|
|
35
|
+
https://www.reddit.com/r/python/comments/abc123/title/
|
|
36
|
+
"""
|
|
37
|
+
# Validate URL
|
|
38
|
+
if not validate_reddit_url(url):
|
|
39
|
+
click.echo("Error: Invalid Reddit thread URL", err=True)
|
|
40
|
+
click.echo("Expected format: https://www.reddit.com/r/SUBREDDIT/comments/ID/TITLE/", err=True)
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
# Determine output path
|
|
44
|
+
output_path = output if output else get_default_output_path(url)
|
|
45
|
+
|
|
46
|
+
if not quiet:
|
|
47
|
+
click.echo(f"Fetching thread from Reddit...")
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
# Fetch thread data
|
|
51
|
+
fetcher = RedditFetcher()
|
|
52
|
+
json_data = fetcher.fetch_thread(url)
|
|
53
|
+
|
|
54
|
+
if not quiet:
|
|
55
|
+
click.echo(f"Parsing comments...")
|
|
56
|
+
|
|
57
|
+
# Parse into structured format
|
|
58
|
+
thread = parse_thread(json_data)
|
|
59
|
+
|
|
60
|
+
if not quiet:
|
|
61
|
+
click.echo(f"Found {thread.comment_count} comments (parsed {len(thread.comments)} top-level)")
|
|
62
|
+
|
|
63
|
+
# Save to file
|
|
64
|
+
save_json(thread.to_dict(), output_path, pretty=pretty)
|
|
65
|
+
|
|
66
|
+
if not quiet:
|
|
67
|
+
click.echo(f"✓ Saved to: {output_path}")
|
|
68
|
+
else:
|
|
69
|
+
click.echo(str(output_path))
|
|
70
|
+
|
|
71
|
+
except Exception as e:
|
|
72
|
+
click.echo(f"Error: {e}", err=True)
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == '__main__':
|
|
77
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""HTTP client for fetching Reddit data."""
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RedditFetcher:
|
|
8
|
+
"""Fetches Reddit thread data using the public JSON API."""
|
|
9
|
+
|
|
10
|
+
USER_AGENT = "getred/0.1.0 (Reddit Thread Fetcher CLI)"
|
|
11
|
+
TIMEOUT = 30.0
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
"""Initialize the fetcher with custom headers."""
|
|
15
|
+
self.headers = {
|
|
16
|
+
"User-Agent": self.USER_AGENT
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def fetch_thread(self, url: str) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Fetch a Reddit thread as JSON.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
url: Reddit thread URL (will be converted to JSON endpoint)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Dict containing Reddit API response
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
httpx.HTTPError: If request fails
|
|
31
|
+
"""
|
|
32
|
+
# Ensure URL ends with .json
|
|
33
|
+
json_url = url.rstrip('/') + '.json'
|
|
34
|
+
|
|
35
|
+
with httpx.Client(headers=self.headers, timeout=self.TIMEOUT) as client:
|
|
36
|
+
response = client.get(json_url)
|
|
37
|
+
response.raise_for_status()
|
|
38
|
+
return response.json()
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Data models for Reddit threads and comments."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Comment:
|
|
9
|
+
"""Represents a Reddit comment."""
|
|
10
|
+
|
|
11
|
+
id: str
|
|
12
|
+
author: str
|
|
13
|
+
body: str
|
|
14
|
+
score: int
|
|
15
|
+
created_utc: str
|
|
16
|
+
depth: int
|
|
17
|
+
replies: List['Comment'] = field(default_factory=list)
|
|
18
|
+
|
|
19
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
20
|
+
"""Convert comment to dictionary format."""
|
|
21
|
+
return {
|
|
22
|
+
"id": self.id,
|
|
23
|
+
"author": self.author,
|
|
24
|
+
"body": self.body,
|
|
25
|
+
"score": self.score,
|
|
26
|
+
"created_utc": self.created_utc,
|
|
27
|
+
"depth": self.depth,
|
|
28
|
+
"replies": [reply.to_dict() for reply in self.replies]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Thread:
|
|
34
|
+
"""Represents a Reddit thread."""
|
|
35
|
+
|
|
36
|
+
id: str
|
|
37
|
+
title: str
|
|
38
|
+
author: str
|
|
39
|
+
subreddit: str
|
|
40
|
+
url: str
|
|
41
|
+
selftext: str
|
|
42
|
+
score: int
|
|
43
|
+
created_utc: str
|
|
44
|
+
fetched_at: str
|
|
45
|
+
comment_count: int
|
|
46
|
+
comments: List[Comment] = field(default_factory=list)
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
49
|
+
"""Convert thread to dictionary format."""
|
|
50
|
+
return {
|
|
51
|
+
"id": self.id,
|
|
52
|
+
"title": self.title,
|
|
53
|
+
"author": self.author,
|
|
54
|
+
"subreddit": self.subreddit,
|
|
55
|
+
"url": self.url,
|
|
56
|
+
"selftext": self.selftext,
|
|
57
|
+
"score": self.score,
|
|
58
|
+
"created_utc": self.created_utc,
|
|
59
|
+
"fetched_at": self.fetched_at,
|
|
60
|
+
"comment_count": self.comment_count,
|
|
61
|
+
"comments": [comment.to_dict() for comment in self.comments]
|
|
62
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Parser for Reddit JSON responses."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Dict, Any, List
|
|
5
|
+
from getred.models import Thread, Comment
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_timestamp(timestamp: float) -> str:
|
|
9
|
+
"""Convert Unix timestamp to ISO format string."""
|
|
10
|
+
return datetime.utcfromtimestamp(timestamp).isoformat() + 'Z'
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_comment(comment_data: Dict[str, Any], depth: int = 0) -> Comment:
|
|
14
|
+
"""
|
|
15
|
+
Parse a comment from Reddit JSON data.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
comment_data: Raw comment data from Reddit API
|
|
19
|
+
depth: Nesting depth of the comment
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Comment object with nested replies
|
|
23
|
+
"""
|
|
24
|
+
data = comment_data.get('data', {})
|
|
25
|
+
|
|
26
|
+
# Handle deleted/removed comments
|
|
27
|
+
author = data.get('author', '[deleted]')
|
|
28
|
+
body = data.get('body', '[deleted]')
|
|
29
|
+
|
|
30
|
+
comment = Comment(
|
|
31
|
+
id=data.get('id', ''),
|
|
32
|
+
author=author,
|
|
33
|
+
body=body,
|
|
34
|
+
score=data.get('score', 0),
|
|
35
|
+
created_utc=parse_timestamp(data.get('created_utc', 0)),
|
|
36
|
+
depth=depth,
|
|
37
|
+
replies=[]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Parse nested replies
|
|
41
|
+
replies_data = data.get('replies')
|
|
42
|
+
if replies_data and isinstance(replies_data, dict):
|
|
43
|
+
replies_listing = replies_data.get('data', {}).get('children', [])
|
|
44
|
+
for reply_data in replies_listing:
|
|
45
|
+
# Skip "more" objects that indicate additional comments
|
|
46
|
+
if reply_data.get('kind') == 't1':
|
|
47
|
+
comment.replies.append(parse_comment(reply_data, depth + 1))
|
|
48
|
+
|
|
49
|
+
return comment
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_comments(comments_listing: List[Dict[str, Any]]) -> List[Comment]:
|
|
53
|
+
"""
|
|
54
|
+
Parse all top-level comments from the comments listing.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
comments_listing: List of comment objects from Reddit API
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of Comment objects
|
|
61
|
+
"""
|
|
62
|
+
comments = []
|
|
63
|
+
for item in comments_listing:
|
|
64
|
+
# Only parse actual comments (kind = t1), skip "more" objects
|
|
65
|
+
if item.get('kind') == 't1':
|
|
66
|
+
comments.append(parse_comment(item, depth=0))
|
|
67
|
+
|
|
68
|
+
return comments
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def parse_thread(json_data: List[Dict[str, Any]]) -> Thread:
|
|
72
|
+
"""
|
|
73
|
+
Parse a Reddit thread from JSON response.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
json_data: Raw JSON response from Reddit API (list with 2 elements)
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Thread object with all data and nested comments
|
|
80
|
+
"""
|
|
81
|
+
# Reddit API returns [post_data, comments_data]
|
|
82
|
+
post_listing = json_data[0]['data']['children'][0]['data']
|
|
83
|
+
comments_listing = json_data[1]['data']['children']
|
|
84
|
+
|
|
85
|
+
thread = Thread(
|
|
86
|
+
id=post_listing.get('id', ''),
|
|
87
|
+
title=post_listing.get('title', ''),
|
|
88
|
+
author=post_listing.get('author', '[deleted]'),
|
|
89
|
+
subreddit=post_listing.get('subreddit', ''),
|
|
90
|
+
url=post_listing.get('url', ''),
|
|
91
|
+
selftext=post_listing.get('selftext', ''),
|
|
92
|
+
score=post_listing.get('score', 0),
|
|
93
|
+
created_utc=parse_timestamp(post_listing.get('created_utc', 0)),
|
|
94
|
+
fetched_at=datetime.utcnow().isoformat() + 'Z',
|
|
95
|
+
comment_count=post_listing.get('num_comments', 0),
|
|
96
|
+
comments=parse_comments(comments_listing)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return thread
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Utility functions for URL validation, slug generation, and file operations."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def validate_reddit_url(url: str) -> bool:
|
|
10
|
+
"""
|
|
11
|
+
Validate that a URL is a Reddit thread URL.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
url: URL to validate
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
True if valid Reddit thread URL, False otherwise
|
|
18
|
+
"""
|
|
19
|
+
pattern = r'^https?://(www\.)?reddit\.com/r/[^/]+/comments/[^/]+/'
|
|
20
|
+
return bool(re.match(pattern, url))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def generate_slug(url: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Generate a filename slug from a Reddit URL.
|
|
26
|
+
|
|
27
|
+
Extracts the thread ID and title from the URL.
|
|
28
|
+
Example: https://reddit.com/r/python/comments/abc123/cool_title/
|
|
29
|
+
Returns: abc123_cool_title
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
url: Reddit thread URL
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Slug string suitable for filename
|
|
36
|
+
"""
|
|
37
|
+
# Extract thread ID and title from URL
|
|
38
|
+
# Pattern: /r/subreddit/comments/ID/title/
|
|
39
|
+
match = re.search(r'/comments/([^/]+)/([^/]+)', url)
|
|
40
|
+
if match:
|
|
41
|
+
thread_id = match.group(1)
|
|
42
|
+
title_slug = match.group(2)
|
|
43
|
+
return f"{thread_id}_{title_slug}"
|
|
44
|
+
|
|
45
|
+
# Fallback to just using the thread ID
|
|
46
|
+
match = re.search(r'/comments/([^/]+)', url)
|
|
47
|
+
if match:
|
|
48
|
+
return match.group(1)
|
|
49
|
+
|
|
50
|
+
return "reddit_thread"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_default_output_path(url: str) -> Path:
|
|
54
|
+
"""
|
|
55
|
+
Generate default output path in ~/Downloads.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
url: Reddit thread URL
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path object for output file
|
|
62
|
+
"""
|
|
63
|
+
downloads_dir = Path.home() / "Downloads"
|
|
64
|
+
slug = generate_slug(url)
|
|
65
|
+
return downloads_dir / f"{slug}.json"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def save_json(data: Dict[str, Any], output_path: Path, pretty: bool = True) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Save data as JSON file.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
data: Dictionary to save
|
|
74
|
+
output_path: Path where to save the file
|
|
75
|
+
pretty: Whether to pretty-print the JSON (default: True)
|
|
76
|
+
"""
|
|
77
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
|
|
79
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
80
|
+
if pretty:
|
|
81
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
82
|
+
else:
|
|
83
|
+
json.dump(data, f, ensure_ascii=False)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Test suite for getred."""
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Shared fixtures for getred tests."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def sample_comment_data():
|
|
8
|
+
"""Minimal comment structure from Reddit API."""
|
|
9
|
+
return {
|
|
10
|
+
"kind": "t1",
|
|
11
|
+
"data": {
|
|
12
|
+
"id": "comment123",
|
|
13
|
+
"author": "test_user",
|
|
14
|
+
"body": "This is a test comment",
|
|
15
|
+
"score": 42,
|
|
16
|
+
"created_utc": 1609459200.0, # 2021-01-01 00:00:00 UTC
|
|
17
|
+
"replies": ""
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def sample_comment_with_replies():
|
|
24
|
+
"""Comment with nested reply."""
|
|
25
|
+
return {
|
|
26
|
+
"kind": "t1",
|
|
27
|
+
"data": {
|
|
28
|
+
"id": "parent123",
|
|
29
|
+
"author": "parent_user",
|
|
30
|
+
"body": "Parent comment",
|
|
31
|
+
"score": 100,
|
|
32
|
+
"created_utc": 1609459200.0,
|
|
33
|
+
"replies": {
|
|
34
|
+
"kind": "Listing",
|
|
35
|
+
"data": {
|
|
36
|
+
"children": [
|
|
37
|
+
{
|
|
38
|
+
"kind": "t1",
|
|
39
|
+
"data": {
|
|
40
|
+
"id": "child123",
|
|
41
|
+
"author": "child_user",
|
|
42
|
+
"body": "Child comment",
|
|
43
|
+
"score": 50,
|
|
44
|
+
"created_utc": 1609462800.0, # 1 hour later
|
|
45
|
+
"replies": ""
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.fixture
|
|
56
|
+
def sample_thread_json():
|
|
57
|
+
"""Full 2-element Reddit API response."""
|
|
58
|
+
return [
|
|
59
|
+
{
|
|
60
|
+
"kind": "Listing",
|
|
61
|
+
"data": {
|
|
62
|
+
"children": [
|
|
63
|
+
{
|
|
64
|
+
"kind": "t3",
|
|
65
|
+
"data": {
|
|
66
|
+
"id": "thread123",
|
|
67
|
+
"title": "Test Thread Title",
|
|
68
|
+
"author": "thread_author",
|
|
69
|
+
"subreddit": "python",
|
|
70
|
+
"url": "https://reddit.com/r/python/comments/thread123/test_thread_title/",
|
|
71
|
+
"selftext": "This is the thread body",
|
|
72
|
+
"score": 500,
|
|
73
|
+
"created_utc": 1609459200.0,
|
|
74
|
+
"num_comments": 2
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"kind": "Listing",
|
|
82
|
+
"data": {
|
|
83
|
+
"children": [
|
|
84
|
+
{
|
|
85
|
+
"kind": "t1",
|
|
86
|
+
"data": {
|
|
87
|
+
"id": "comment1",
|
|
88
|
+
"author": "user1",
|
|
89
|
+
"body": "First comment",
|
|
90
|
+
"score": 10,
|
|
91
|
+
"created_utc": 1609462800.0,
|
|
92
|
+
"replies": ""
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"kind": "more",
|
|
97
|
+
"data": {
|
|
98
|
+
"count": 5,
|
|
99
|
+
"children": ["abc", "def"]
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
]
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Tests for data models."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from getred.models import Comment, Thread
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestCommentToDict:
|
|
8
|
+
"""Tests for Comment.to_dict() method."""
|
|
9
|
+
|
|
10
|
+
def test_comment_to_dict(self):
|
|
11
|
+
"""Serializes comment fields correctly."""
|
|
12
|
+
comment = Comment(
|
|
13
|
+
id="test123",
|
|
14
|
+
author="test_user",
|
|
15
|
+
body="Test comment body",
|
|
16
|
+
score=42,
|
|
17
|
+
created_utc="2021-01-01T00:00:00Z",
|
|
18
|
+
depth=0,
|
|
19
|
+
replies=[]
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
result = comment.to_dict()
|
|
23
|
+
|
|
24
|
+
assert result == {
|
|
25
|
+
"id": "test123",
|
|
26
|
+
"author": "test_user",
|
|
27
|
+
"body": "Test comment body",
|
|
28
|
+
"score": 42,
|
|
29
|
+
"created_utc": "2021-01-01T00:00:00Z",
|
|
30
|
+
"depth": 0,
|
|
31
|
+
"replies": []
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def test_comment_to_dict_nested(self):
|
|
35
|
+
"""Serializes nested replies recursively."""
|
|
36
|
+
child = Comment(
|
|
37
|
+
id="child123",
|
|
38
|
+
author="child_user",
|
|
39
|
+
body="Child comment",
|
|
40
|
+
score=10,
|
|
41
|
+
created_utc="2021-01-01T01:00:00Z",
|
|
42
|
+
depth=1,
|
|
43
|
+
replies=[]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
parent = Comment(
|
|
47
|
+
id="parent123",
|
|
48
|
+
author="parent_user",
|
|
49
|
+
body="Parent comment",
|
|
50
|
+
score=50,
|
|
51
|
+
created_utc="2021-01-01T00:00:00Z",
|
|
52
|
+
depth=0,
|
|
53
|
+
replies=[child]
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
result = parent.to_dict()
|
|
57
|
+
|
|
58
|
+
assert result["id"] == "parent123"
|
|
59
|
+
assert result["depth"] == 0
|
|
60
|
+
assert len(result["replies"]) == 1
|
|
61
|
+
assert result["replies"][0]["id"] == "child123"
|
|
62
|
+
assert result["replies"][0]["depth"] == 1
|
|
63
|
+
assert result["replies"][0]["replies"] == []
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestThreadToDict:
|
|
67
|
+
"""Tests for Thread.to_dict() method."""
|
|
68
|
+
|
|
69
|
+
def test_thread_to_dict(self):
|
|
70
|
+
"""Serializes thread with comments."""
|
|
71
|
+
comment = Comment(
|
|
72
|
+
id="comment123",
|
|
73
|
+
author="commenter",
|
|
74
|
+
body="Great post!",
|
|
75
|
+
score=25,
|
|
76
|
+
created_utc="2021-01-01T01:00:00Z",
|
|
77
|
+
depth=0,
|
|
78
|
+
replies=[]
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
thread = Thread(
|
|
82
|
+
id="thread123",
|
|
83
|
+
title="Test Thread",
|
|
84
|
+
author="thread_author",
|
|
85
|
+
subreddit="python",
|
|
86
|
+
url="https://reddit.com/r/python/comments/thread123/test_thread/",
|
|
87
|
+
selftext="Thread body content",
|
|
88
|
+
score=500,
|
|
89
|
+
created_utc="2021-01-01T00:00:00Z",
|
|
90
|
+
fetched_at="2021-01-01T02:00:00Z",
|
|
91
|
+
comment_count=1,
|
|
92
|
+
comments=[comment]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
result = thread.to_dict()
|
|
96
|
+
|
|
97
|
+
assert result["id"] == "thread123"
|
|
98
|
+
assert result["title"] == "Test Thread"
|
|
99
|
+
assert result["author"] == "thread_author"
|
|
100
|
+
assert result["subreddit"] == "python"
|
|
101
|
+
assert result["url"] == "https://reddit.com/r/python/comments/thread123/test_thread/"
|
|
102
|
+
assert result["selftext"] == "Thread body content"
|
|
103
|
+
assert result["score"] == 500
|
|
104
|
+
assert result["created_utc"] == "2021-01-01T00:00:00Z"
|
|
105
|
+
assert result["fetched_at"] == "2021-01-01T02:00:00Z"
|
|
106
|
+
assert result["comment_count"] == 1
|
|
107
|
+
assert len(result["comments"]) == 1
|
|
108
|
+
assert result["comments"][0]["id"] == "comment123"
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Tests for Reddit JSON parser."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from getred.parser import parse_timestamp, parse_comment, parse_comments, parse_thread
|
|
5
|
+
from getred.models import Comment, Thread
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestParseTimestamp:
|
|
9
|
+
"""Tests for parse_timestamp function."""
|
|
10
|
+
|
|
11
|
+
def test_parse_timestamp(self):
|
|
12
|
+
"""Unix timestamp converts to ISO format."""
|
|
13
|
+
timestamp = 1609459200.0 # 2021-01-01 00:00:00 UTC
|
|
14
|
+
result = parse_timestamp(timestamp)
|
|
15
|
+
assert result == "2021-01-01T00:00:00Z"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestParseComment:
|
|
19
|
+
"""Tests for parse_comment function."""
|
|
20
|
+
|
|
21
|
+
def test_parse_comment_basic(self, sample_comment_data):
|
|
22
|
+
"""Parses comment fields correctly."""
|
|
23
|
+
comment = parse_comment(sample_comment_data, depth=0)
|
|
24
|
+
|
|
25
|
+
assert comment.id == "comment123"
|
|
26
|
+
assert comment.author == "test_user"
|
|
27
|
+
assert comment.body == "This is a test comment"
|
|
28
|
+
assert comment.score == 42
|
|
29
|
+
assert comment.created_utc == "2021-01-01T00:00:00Z"
|
|
30
|
+
assert comment.depth == 0
|
|
31
|
+
assert comment.replies == []
|
|
32
|
+
|
|
33
|
+
def test_parse_comment_with_replies(self, sample_comment_with_replies):
|
|
34
|
+
"""Handles nested replies and depth tracking."""
|
|
35
|
+
parent = parse_comment(sample_comment_with_replies, depth=0)
|
|
36
|
+
|
|
37
|
+
assert parent.id == "parent123"
|
|
38
|
+
assert parent.author == "parent_user"
|
|
39
|
+
assert parent.depth == 0
|
|
40
|
+
assert len(parent.replies) == 1
|
|
41
|
+
|
|
42
|
+
child = parent.replies[0]
|
|
43
|
+
assert child.id == "child123"
|
|
44
|
+
assert child.author == "child_user"
|
|
45
|
+
assert child.body == "Child comment"
|
|
46
|
+
assert child.depth == 1
|
|
47
|
+
assert child.replies == []
|
|
48
|
+
|
|
49
|
+
def test_parse_comment_deleted(self):
|
|
50
|
+
"""Handles deleted/removed comments."""
|
|
51
|
+
deleted_data = {
|
|
52
|
+
"kind": "t1",
|
|
53
|
+
"data": {
|
|
54
|
+
"id": "deleted123",
|
|
55
|
+
"author": "[deleted]",
|
|
56
|
+
"body": "[removed]",
|
|
57
|
+
"score": 0,
|
|
58
|
+
"created_utc": 1609459200.0,
|
|
59
|
+
"replies": ""
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
comment = parse_comment(deleted_data, depth=0)
|
|
63
|
+
|
|
64
|
+
assert comment.author == "[deleted]"
|
|
65
|
+
assert comment.body == "[removed]"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TestParseComments:
|
|
69
|
+
"""Tests for parse_comments function."""
|
|
70
|
+
|
|
71
|
+
def test_parse_comments_filters_more(self):
|
|
72
|
+
"""Skips 'more' objects (kind != t1)."""
|
|
73
|
+
comments_listing = [
|
|
74
|
+
{
|
|
75
|
+
"kind": "t1",
|
|
76
|
+
"data": {
|
|
77
|
+
"id": "comment1",
|
|
78
|
+
"author": "user1",
|
|
79
|
+
"body": "Valid comment",
|
|
80
|
+
"score": 10,
|
|
81
|
+
"created_utc": 1609459200.0,
|
|
82
|
+
"replies": ""
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"kind": "more",
|
|
87
|
+
"data": {
|
|
88
|
+
"count": 5,
|
|
89
|
+
"children": ["abc", "def"]
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"kind": "t1",
|
|
94
|
+
"data": {
|
|
95
|
+
"id": "comment2",
|
|
96
|
+
"author": "user2",
|
|
97
|
+
"body": "Another valid comment",
|
|
98
|
+
"score": 20,
|
|
99
|
+
"created_utc": 1609462800.0,
|
|
100
|
+
"replies": ""
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
comments = parse_comments(comments_listing)
|
|
106
|
+
|
|
107
|
+
assert len(comments) == 2
|
|
108
|
+
assert comments[0].id == "comment1"
|
|
109
|
+
assert comments[1].id == "comment2"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class TestParseThread:
|
|
113
|
+
"""Tests for parse_thread function."""
|
|
114
|
+
|
|
115
|
+
def test_parse_thread(self, sample_thread_json):
|
|
116
|
+
"""Full thread parsing with metadata and comments."""
|
|
117
|
+
thread = parse_thread(sample_thread_json)
|
|
118
|
+
|
|
119
|
+
assert thread.id == "thread123"
|
|
120
|
+
assert thread.title == "Test Thread Title"
|
|
121
|
+
assert thread.author == "thread_author"
|
|
122
|
+
assert thread.subreddit == "python"
|
|
123
|
+
assert thread.url == "https://reddit.com/r/python/comments/thread123/test_thread_title/"
|
|
124
|
+
assert thread.selftext == "This is the thread body"
|
|
125
|
+
assert thread.score == 500
|
|
126
|
+
assert thread.created_utc == "2021-01-01T00:00:00Z"
|
|
127
|
+
assert thread.comment_count == 2
|
|
128
|
+
assert thread.fetched_at.endswith("Z")
|
|
129
|
+
|
|
130
|
+
# Should only parse t1 comments, not 'more' objects
|
|
131
|
+
assert len(thread.comments) == 1
|
|
132
|
+
assert thread.comments[0].id == "comment1"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Tests for utility functions."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from getred.utils import validate_reddit_url, generate_slug
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestValidateRedditUrl:
|
|
8
|
+
"""Tests for validate_reddit_url function."""
|
|
9
|
+
|
|
10
|
+
def test_validate_reddit_url_valid(self):
|
|
11
|
+
"""Valid URLs with/without www should pass validation."""
|
|
12
|
+
valid_urls = [
|
|
13
|
+
"https://reddit.com/r/python/comments/abc123/cool_title/",
|
|
14
|
+
"https://www.reddit.com/r/python/comments/abc123/cool_title/",
|
|
15
|
+
"http://reddit.com/r/AskReddit/comments/xyz789/interesting_question/",
|
|
16
|
+
"https://reddit.com/r/programming/comments/test123/test/extra/path/",
|
|
17
|
+
]
|
|
18
|
+
for url in valid_urls:
|
|
19
|
+
assert validate_reddit_url(url), f"Expected {url} to be valid"
|
|
20
|
+
|
|
21
|
+
def test_validate_reddit_url_invalid(self):
|
|
22
|
+
"""Non-Reddit and malformed URLs should fail validation."""
|
|
23
|
+
invalid_urls = [
|
|
24
|
+
"https://example.com/r/python/comments/abc123/",
|
|
25
|
+
"https://reddit.com/r/python/", # Missing /comments/
|
|
26
|
+
"https://reddit.com/comments/abc123/", # Missing /r/subreddit/
|
|
27
|
+
"not a url",
|
|
28
|
+
"",
|
|
29
|
+
"ftp://reddit.com/r/python/comments/abc123/",
|
|
30
|
+
]
|
|
31
|
+
for url in invalid_urls:
|
|
32
|
+
assert not validate_reddit_url(url), f"Expected {url} to be invalid"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TestGenerateSlug:
|
|
36
|
+
"""Tests for generate_slug function."""
|
|
37
|
+
|
|
38
|
+
def test_generate_slug_full_url(self):
|
|
39
|
+
"""Extracts {id}_{title} from complete URL."""
|
|
40
|
+
url = "https://reddit.com/r/python/comments/abc123/cool_python_feature/"
|
|
41
|
+
assert generate_slug(url) == "abc123_cool_python_feature"
|
|
42
|
+
|
|
43
|
+
def test_generate_slug_id_only(self):
|
|
44
|
+
"""Handles URL without title, returns only ID."""
|
|
45
|
+
url = "https://reddit.com/r/python/comments/xyz789/"
|
|
46
|
+
assert generate_slug(url) == "xyz789"
|
|
47
|
+
|
|
48
|
+
def test_generate_slug_with_extra_path(self):
|
|
49
|
+
"""Handles URLs with additional path segments."""
|
|
50
|
+
url = "https://reddit.com/r/AskReddit/comments/test123/interesting_question/extra/path/"
|
|
51
|
+
assert generate_slug(url) == "test123_interesting_question"
|
|
52
|
+
|
|
53
|
+
def test_generate_slug_fallback(self):
|
|
54
|
+
"""Returns default for malformed URLs."""
|
|
55
|
+
url = "https://reddit.com/not/a/valid/url/"
|
|
56
|
+
assert generate_slug(url) == "reddit_thread"
|