fourchan-local 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fourchan_local-1.0.0/LICENSE +21 -0
- fourchan_local-1.0.0/PKG-INFO +186 -0
- fourchan_local-1.0.0/README.md +153 -0
- fourchan_local-1.0.0/fourchan_local/__init__.py +8 -0
- fourchan_local-1.0.0/fourchan_local/app.py +706 -0
- fourchan_local-1.0.0/fourchan_local/cli.py +512 -0
- fourchan_local-1.0.0/fourchan_local/db.py +311 -0
- fourchan_local-1.0.0/fourchan_local/fourchan.py +107 -0
- fourchan_local-1.0.0/fourchan_local/htmlstrip.py +22 -0
- fourchan_local-1.0.0/fourchan_local/media.py +151 -0
- fourchan_local-1.0.0/fourchan_local/poller.py +112 -0
- fourchan_local-1.0.0/fourchan_local/retention.py +188 -0
- fourchan_local-1.0.0/fourchan_local/schema.sqlite.sql +138 -0
- fourchan_local-1.0.0/fourchan_local/static/4chan.css +1734 -0
- fourchan_local-1.0.0/fourchan_local/static/app.js +97 -0
- fourchan_local-1.0.0/fourchan_local/static/style.css +63 -0
- fourchan_local-1.0.0/fourchan_local/templates/_boardhead.html +10 -0
- fourchan_local-1.0.0/fourchan_local/templates/_post.html +62 -0
- fourchan_local-1.0.0/fourchan_local/templates/base.html +36 -0
- fourchan_local-1.0.0/fourchan_local/templates/board.html +28 -0
- fourchan_local-1.0.0/fourchan_local/templates/catalog.html +49 -0
- fourchan_local-1.0.0/fourchan_local/templates/index.html +18 -0
- fourchan_local-1.0.0/fourchan_local/templates/pins.html +73 -0
- fourchan_local-1.0.0/fourchan_local/templates/search.html +35 -0
- fourchan_local-1.0.0/fourchan_local/templates/thread.html +14 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/PKG-INFO +186 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/SOURCES.txt +31 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/dependency_links.txt +1 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/entry_points.txt +2 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/requires.txt +6 -0
- fourchan_local-1.0.0/fourchan_local.egg-info/top_level.txt +1 -0
- fourchan_local-1.0.0/pyproject.toml +53 -0
- fourchan_local-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ilja Adamenko
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fourchan-local
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Mirror 4chan boards to your PC and browse them locally with full-text search.
|
|
5
|
+
Author: Ilja Adamenko
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Iljaadam/4chan-local
|
|
8
|
+
Project-URL: Repository, https://github.com/Iljaadam/4chan-local
|
|
9
|
+
Project-URL: Issues, https://github.com/Iljaadam/4chan-local/issues
|
|
10
|
+
Keywords: 4chan,archive,mirror,scraper,sqlite,full-text-search,self-hosted
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Environment :: Web Environment
|
|
14
|
+
Classifier: Framework :: FastAPI
|
|
15
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
22
|
+
Classifier: Topic :: System :: Archiving
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: httpx==0.27.2
|
|
27
|
+
Requires-Dist: platformdirs==4.3.6
|
|
28
|
+
Requires-Dist: fastapi==0.115.5
|
|
29
|
+
Requires-Dist: uvicorn[standard]==0.32.1
|
|
30
|
+
Requires-Dist: jinja2==3.1.4
|
|
31
|
+
Requires-Dist: markupsafe>=2.1
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# fourchan-local
|
|
35
|
+
|
|
36
|
+
Run your **own local copy of 4chan** on your PC. Pick the boards you want; it mirrors
|
|
37
|
+
them live, gives you a local web UI to browse and full-text search, and lets you
|
|
38
|
+
**pin** threads to keep them forever.
|
|
39
|
+
|
|
40
|
+
The point: 4chan purges threads constantly. This mirrors what's live to your machine
|
|
41
|
+
and — when a thread finally 404s — throws it away **unless you pinned it**. Disk stays
|
|
42
|
+
bounded: whatever's currently live on your boards, plus your saved set. No unbounded
|
|
43
|
+
archive, no server, no account.
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
4cl init # pick boards, media phase, review the blocklist
|
|
47
|
+
4cl start # mirror + serve the UI
|
|
48
|
+
# open localhost:8080, browse, click 📌 to keep a thread past its 404
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
> ⚠️ **Legal & responsible use — read before enabling media.** This tool
|
|
52
|
+
> **downloads files from 4chan onto your machine automatically**. You run it, so you
|
|
53
|
+
> are the operator and are responsible for what lands on your disk under your local
|
|
54
|
+
> law. It ships with a **default-on blocklist** that skips file *bytes* from
|
|
55
|
+
> photographic/anonymous-upload boards (`b, soc, r, hc, gif, s, t`) where illegal
|
|
56
|
+
> content — including CSAM — is regularly live before moderators remove it. **Do not
|
|
57
|
+
> disable that blocklist** unless you fully understand the legal exposure in your
|
|
58
|
+
> jurisdiction. See [Content blocklist](#content-blocklist). No warranty; not
|
|
59
|
+
> affiliated with 4chan; respect their [API rules](https://github.com/4chan/4chan-API)
|
|
60
|
+
> (the poller caps at ≤1 req/s — don't raise it).
|
|
61
|
+
|
|
62
|
+
## Model
|
|
63
|
+
|
|
64
|
+
- **Mirror + pin.** Live threads are mirrored to a local DB + media store. When a
|
|
65
|
+
thread 404s on 4chan it's purged locally too — **unless pinned**, in which case it
|
|
66
|
+
(and its media) is kept indefinitely.
|
|
67
|
+
- **Bounded disk.** Steady state = live-stock of your boards + your pins. Pick a few
|
|
68
|
+
boards → tens of GB that stays flat, growing only with what you pin. (For scale
|
|
69
|
+
context: *all* media live across all 77 boards at any instant is ~380 GB; a typical
|
|
70
|
+
2–5 board pick is ~50–100 GB.)
|
|
71
|
+
- **Local only.** No public surface, no accounts. UI on `localhost`.
|
|
72
|
+
|
|
73
|
+
## What runs
|
|
74
|
+
|
|
75
|
+
- **scraper** — Python, polls `a.4cdn.org` at ≤1 req/s, diffs `threads.json`,
|
|
76
|
+
fetches only changed threads. Mirror+pin GC purges 404'd-unpinned threads.
|
|
77
|
+
- **media** — worker downloads files into a content-addressed store, deduped by md5.
|
|
78
|
+
- **web** — FastAPI + Jinja. Board index → catalog → thread, plus FTS search. Serves
|
|
79
|
+
`/media` itself (byte-range/seek supported) — no nginx.
|
|
80
|
+
|
|
81
|
+
Single SQLite file (WAL, FTS5) + on-disk media store under your OS data dir. No
|
|
82
|
+
Docker, no Postgres, no nginx.
|
|
83
|
+
|
|
84
|
+
## Run (local CLI — `4cl`)
|
|
85
|
+
|
|
86
|
+
The `4cl` CLI drives the whole thing. It stores the DB + media under your OS data
|
|
87
|
+
dir (`~/.local/share/fourchan-local/` on Linux).
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pipx install fourchan-local # isolated CLI install; exposes the `4cl` command
|
|
91
|
+
4cl init # first-run wizard: boards, media phase, blocklist
|
|
92
|
+
4cl start # supervise scraper + media + web, UI on :8080
|
|
93
|
+
# browse http://127.0.0.1:8080, Ctrl-C to stop (or `4cl stop` from elsewhere)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`4cl init` walks you through picking boards, the media phase, and reviewing the
|
|
97
|
+
media-bytes **blocklist** (see below). `4cl start` on a fresh install runs the same
|
|
98
|
+
wizard automatically. For local hacking, `pip install -e .` from a checkout works
|
|
99
|
+
the same; `pip install fourchan-local` (into a venv) is the non-isolated alternative.
|
|
100
|
+
|
|
101
|
+
| Command | Does |
|
|
102
|
+
|---------|------|
|
|
103
|
+
| `4cl init` | first-run setup wizard (boards, media, blocklist) |
|
|
104
|
+
| `4cl boards add <b>…` | enable boards (media off for blocklisted boards) |
|
|
105
|
+
| `4cl boards rm <b>…` | disable a board, keeping its archived data |
|
|
106
|
+
| `4cl boards list` | show boards + state |
|
|
107
|
+
| `4cl start [--port N]` | run poller + media worker + UI together (localhost) |
|
|
108
|
+
| `4cl stop` | stop a running mirror |
|
|
109
|
+
| `4cl status` | boards, disk used, blocklist, live vs 404'd vs pinned counts |
|
|
110
|
+
| `4cl gc [--dry-run]` | purge 404'd-unpinned threads + orphan media now |
|
|
111
|
+
| `4cl config media thumbs\|full\|off` | per-install media phase |
|
|
112
|
+
| `4cl config blocklist [<b>… \| none]` | show/set boards whose file bytes are skipped |
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
## Config
|
|
116
|
+
|
|
117
|
+
Primary config is the `4cl` CLI (`init`, `boards`, `config`) — it writes the DB.
|
|
118
|
+
Everything else is **optional environment overrides** (see `.env.example`); nothing
|
|
119
|
+
auto-loads a file, so `export` them or prefix a command to use them.
|
|
120
|
+
|
|
121
|
+
| Var | Meaning |
|
|
122
|
+
|-----|---------|
|
|
123
|
+
| `FOURCHAN_DB` / `MEDIA_STORE` | override the DB file / media dir (blank = OS data dir) |
|
|
124
|
+
| `POLL_INTERVAL` | seconds between full poll cycles |
|
|
125
|
+
| `REQ_PER_SEC` / `REQ_PER_SEC_MEDIA` | API / media-CDN rate caps. Keep ≤ 1 (4chan rule). |
|
|
126
|
+
| `PURGE_GRACE` | seconds a 404'd, unpinned thread stays before GC purges it |
|
|
127
|
+
| `BOARDS`, `MEDIA_PHASE`, `MEDIA_BLOCKLIST` | normally set via `4cl`; env only overrides a manual poller/media run |
|
|
128
|
+
|
|
129
|
+
The UI port is `4cl start --port N` (bound `127.0.0.1` only).
|
|
130
|
+
|
|
131
|
+
## Media store
|
|
132
|
+
|
|
133
|
+
Content-addressed, deduplicated by 4chan-supplied md5:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
/media/thumb/<ab>/<cd>/<md5hex>.jpg # thumbnails
|
|
137
|
+
/media/full/<ab>/<cd>/<md5hex><ext> # full files (images phase)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The app never touches the bytes, only builds URLs from the DB.
|
|
141
|
+
|
|
142
|
+
### Content blocklist
|
|
143
|
+
|
|
144
|
+
The media worker **downloads file bytes to your machine automatically**, on a timer,
|
|
145
|
+
before you ever open a page. The blocklist names boards whose bytes are therefore
|
|
146
|
+
**never** downloaded (their text + file manifest are still captured). The default set
|
|
147
|
+
targets photographic/anonymous-upload boards (`b, soc, r, hc, gif, s, t`) where
|
|
148
|
+
illegal content — including CSAM — is regularly live before mods remove it; on a
|
|
149
|
+
**local single-user tool** that content would land on *your* disk and *your* legal
|
|
150
|
+
exposure.
|
|
151
|
+
|
|
152
|
+
So it ships **default-on**. `4cl init` shows it during setup; `4cl config blocklist`
|
|
153
|
+
edits it (`4cl config blocklist none` clears it, behind a typed confirmation). The
|
|
154
|
+
persisted list is handed to the poller, which sets each board's `fetch_media`. Review
|
|
155
|
+
it for your jurisdiction before widening media. (`MEDIA_BLOCKLIST` env still overrides
|
|
156
|
+
per-run for advanced/manual use.)
|
|
157
|
+
|
|
158
|
+
## Install
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
pipx install git+https://github.com/Iljaadam/4chan-local # isolated CLI
|
|
162
|
+
# or, from a checkout, for hacking:
|
|
163
|
+
git clone https://github.com/Iljaadam/4chan-local && cd 4chan-local
|
|
164
|
+
pip install -e .
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Python ≥ 3.10, cross-platform (data lives under your OS data dir). A PyPI release
|
|
168
|
+
(`pipx install fourchan-local`) is planned.
|
|
169
|
+
|
|
170
|
+
## Contributing
|
|
171
|
+
|
|
172
|
+
Issues and PRs welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) for dev setup and
|
|
173
|
+
scope. Please **do not** open PRs that weaken the media blocklist default or add a
|
|
174
|
+
path that downloads bytes from the blocked boards by default.
|
|
175
|
+
|
|
176
|
+
## Roadmap
|
|
177
|
+
|
|
178
|
+
Pivot plan, phased: [`docs/ROADMAP-local-tool.md`](docs/ROADMAP-local-tool.md).
|
|
179
|
+
Short version — P0 reframe → P1 SQLite port → P2 retention/GC → P3 pin UI →
|
|
180
|
+
P4 `4cl` CLI → P5 drop Docker/nginx → P6 pip package — **all done.**
|
|
181
|
+
|
|
182
|
+
## License
|
|
183
|
+
|
|
184
|
+
[MIT](LICENSE) © Ilja Adamenko. Provided **as-is, without warranty**. Not affiliated
|
|
185
|
+
with, endorsed by, or connected to 4chan. Using it to download content is your
|
|
186
|
+
responsibility under your local law.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# fourchan-local
|
|
2
|
+
|
|
3
|
+
Run your **own local copy of 4chan** on your PC. Pick the boards you want; it mirrors
|
|
4
|
+
them live, gives you a local web UI to browse and full-text search, and lets you
|
|
5
|
+
**pin** threads to keep them forever.
|
|
6
|
+
|
|
7
|
+
The point: 4chan purges threads constantly. This mirrors what's live to your machine
|
|
8
|
+
and — when a thread finally 404s — throws it away **unless you pinned it**. Disk stays
|
|
9
|
+
bounded: whatever's currently live on your boards, plus your saved set. No unbounded
|
|
10
|
+
archive, no server, no account.
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
4cl init # pick boards, media phase, review the blocklist
|
|
14
|
+
4cl start # mirror + serve the UI
|
|
15
|
+
# open localhost:8080, browse, click 📌 to keep a thread past its 404
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
> ⚠️ **Legal & responsible use — read before enabling media.** This tool
|
|
19
|
+
> **downloads files from 4chan onto your machine automatically**. You run it, so you
|
|
20
|
+
> are the operator and are responsible for what lands on your disk under your local
|
|
21
|
+
> law. It ships with a **default-on blocklist** that skips file *bytes* from
|
|
22
|
+
> photographic/anonymous-upload boards (`b, soc, r, hc, gif, s, t`) where illegal
|
|
23
|
+
> content — including CSAM — is regularly live before moderators remove it. **Do not
|
|
24
|
+
> disable that blocklist** unless you fully understand the legal exposure in your
|
|
25
|
+
> jurisdiction. See [Content blocklist](#content-blocklist). No warranty; not
|
|
26
|
+
> affiliated with 4chan; respect their [API rules](https://github.com/4chan/4chan-API)
|
|
27
|
+
> (the poller caps at ≤1 req/s — don't raise it).
|
|
28
|
+
|
|
29
|
+
## Model
|
|
30
|
+
|
|
31
|
+
- **Mirror + pin.** Live threads are mirrored to a local DB + media store. When a
|
|
32
|
+
thread 404s on 4chan it's purged locally too — **unless pinned**, in which case it
|
|
33
|
+
(and its media) is kept indefinitely.
|
|
34
|
+
- **Bounded disk.** Steady state = live-stock of your boards + your pins. Pick a few
|
|
35
|
+
boards → tens of GB that stays flat, growing only with what you pin. (For scale
|
|
36
|
+
context: *all* media live across all 77 boards at any instant is ~380 GB; a typical
|
|
37
|
+
2–5 board pick is ~50–100 GB.)
|
|
38
|
+
- **Local only.** No public surface, no accounts. UI on `localhost`.
|
|
39
|
+
|
|
40
|
+
## What runs
|
|
41
|
+
|
|
42
|
+
- **scraper** — Python, polls `a.4cdn.org` at ≤1 req/s, diffs `threads.json`,
|
|
43
|
+
fetches only changed threads. Mirror+pin GC purges 404'd-unpinned threads.
|
|
44
|
+
- **media** — worker downloads files into a content-addressed store, deduped by md5.
|
|
45
|
+
- **web** — FastAPI + Jinja. Board index → catalog → thread, plus FTS search. Serves
|
|
46
|
+
`/media` itself (byte-range/seek supported) — no nginx.
|
|
47
|
+
|
|
48
|
+
Single SQLite file (WAL, FTS5) + on-disk media store under your OS data dir. No
|
|
49
|
+
Docker, no Postgres, no nginx.
|
|
50
|
+
|
|
51
|
+
## Run (local CLI — `4cl`)
|
|
52
|
+
|
|
53
|
+
The `4cl` CLI drives the whole thing. It stores the DB + media under your OS data
|
|
54
|
+
dir (`~/.local/share/fourchan-local/` on Linux).
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pipx install fourchan-local # isolated CLI install; exposes the `4cl` command
|
|
58
|
+
4cl init # first-run wizard: boards, media phase, blocklist
|
|
59
|
+
4cl start # supervise scraper + media + web, UI on :8080
|
|
60
|
+
# browse http://127.0.0.1:8080, Ctrl-C to stop (or `4cl stop` from elsewhere)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
`4cl init` walks you through picking boards, the media phase, and reviewing the
|
|
64
|
+
media-bytes **blocklist** (see below). `4cl start` on a fresh install runs the same
|
|
65
|
+
wizard automatically. For local hacking, `pip install -e .` from a checkout works
|
|
66
|
+
the same; `pip install fourchan-local` (into a venv) is the non-isolated alternative.
|
|
67
|
+
|
|
68
|
+
| Command | Does |
|
|
69
|
+
|---------|------|
|
|
70
|
+
| `4cl init` | first-run setup wizard (boards, media, blocklist) |
|
|
71
|
+
| `4cl boards add <b>…` | enable boards (media off for blocklisted boards) |
|
|
72
|
+
| `4cl boards rm <b>…` | disable a board, keeping its archived data |
|
|
73
|
+
| `4cl boards list` | show boards + state |
|
|
74
|
+
| `4cl start [--port N]` | run poller + media worker + UI together (localhost) |
|
|
75
|
+
| `4cl stop` | stop a running mirror |
|
|
76
|
+
| `4cl status` | boards, disk used, blocklist, live vs 404'd vs pinned counts |
|
|
77
|
+
| `4cl gc [--dry-run]` | purge 404'd-unpinned threads + orphan media now |
|
|
78
|
+
| `4cl config media thumbs\|full\|off` | per-install media phase |
|
|
79
|
+
| `4cl config blocklist [<b>… \| none]` | show/set boards whose file bytes are skipped |
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
## Config
|
|
83
|
+
|
|
84
|
+
Primary config is the `4cl` CLI (`init`, `boards`, `config`) — it writes the DB.
|
|
85
|
+
Everything else is **optional environment overrides** (see `.env.example`); nothing
|
|
86
|
+
auto-loads a file, so `export` them or prefix a command to use them.
|
|
87
|
+
|
|
88
|
+
| Var | Meaning |
|
|
89
|
+
|-----|---------|
|
|
90
|
+
| `FOURCHAN_DB` / `MEDIA_STORE` | override the DB file / media dir (blank = OS data dir) |
|
|
91
|
+
| `POLL_INTERVAL` | seconds between full poll cycles |
|
|
92
|
+
| `REQ_PER_SEC` / `REQ_PER_SEC_MEDIA` | API / media-CDN rate caps. Keep ≤ 1 (4chan rule). |
|
|
93
|
+
| `PURGE_GRACE` | seconds a 404'd, unpinned thread stays before GC purges it |
|
|
94
|
+
| `BOARDS`, `MEDIA_PHASE`, `MEDIA_BLOCKLIST` | normally set via `4cl`; env only overrides a manual poller/media run |
|
|
95
|
+
|
|
96
|
+
The UI port is `4cl start --port N` (bound `127.0.0.1` only).
|
|
97
|
+
|
|
98
|
+
## Media store
|
|
99
|
+
|
|
100
|
+
Content-addressed, deduplicated by 4chan-supplied md5:
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
/media/thumb/<ab>/<cd>/<md5hex>.jpg # thumbnails
|
|
104
|
+
/media/full/<ab>/<cd>/<md5hex><ext> # full files (images phase)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
The app never touches the bytes, only builds URLs from the DB.
|
|
108
|
+
|
|
109
|
+
### Content blocklist
|
|
110
|
+
|
|
111
|
+
The media worker **downloads file bytes to your machine automatically**, on a timer,
|
|
112
|
+
before you ever open a page. The blocklist names boards whose bytes are therefore
|
|
113
|
+
**never** downloaded (their text + file manifest are still captured). The default set
|
|
114
|
+
targets photographic/anonymous-upload boards (`b, soc, r, hc, gif, s, t`) where
|
|
115
|
+
illegal content — including CSAM — is regularly live before mods remove it; on a
|
|
116
|
+
**local single-user tool** that content would land on *your* disk and *your* legal
|
|
117
|
+
exposure.
|
|
118
|
+
|
|
119
|
+
So it ships **default-on**. `4cl init` shows it during setup; `4cl config blocklist`
|
|
120
|
+
edits it (`4cl config blocklist none` clears it, behind a typed confirmation). The
|
|
121
|
+
persisted list is handed to the poller, which sets each board's `fetch_media`. Review
|
|
122
|
+
it for your jurisdiction before widening media. (`MEDIA_BLOCKLIST` env still overrides
|
|
123
|
+
per-run for advanced/manual use.)
|
|
124
|
+
|
|
125
|
+
## Install
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
pipx install git+https://github.com/Iljaadam/4chan-local # isolated CLI
|
|
129
|
+
# or, from a checkout, for hacking:
|
|
130
|
+
git clone https://github.com/Iljaadam/4chan-local && cd 4chan-local
|
|
131
|
+
pip install -e .
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Python ≥ 3.10, cross-platform (data lives under your OS data dir). A PyPI release
|
|
135
|
+
(`pipx install fourchan-local`) is planned.
|
|
136
|
+
|
|
137
|
+
## Contributing
|
|
138
|
+
|
|
139
|
+
Issues and PRs welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) for dev setup and
|
|
140
|
+
scope. Please **do not** open PRs that weaken the media blocklist default or add a
|
|
141
|
+
path that downloads bytes from the blocked boards by default.
|
|
142
|
+
|
|
143
|
+
## Roadmap
|
|
144
|
+
|
|
145
|
+
Pivot plan, phased: [`docs/ROADMAP-local-tool.md`](docs/ROADMAP-local-tool.md).
|
|
146
|
+
Short version — P0 reframe → P1 SQLite port → P2 retention/GC → P3 pin UI →
|
|
147
|
+
P4 `4cl` CLI → P5 drop Docker/nginx → P6 pip package — **all done.**
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
[MIT](LICENSE) © Ilja Adamenko. Provided **as-is, without warranty**. Not affiliated
|
|
152
|
+
with, endorsed by, or connected to 4chan. Using it to download content is your
|
|
153
|
+
responsibility under your local law.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""fourchan-local: installable local-4chan mirror + browser.
|
|
2
|
+
|
|
3
|
+
Everything lives in this package: the `4cl` CLI (cli.py) supervises the poller
|
|
4
|
+
(poller.py), media worker (media.py), and web UI (app.py); db.py + retention.py
|
|
5
|
+
back them with a single SQLite file, and schema/templates/static ship as package
|
|
6
|
+
data so a plain wheel install runs with no source tree present.
|
|
7
|
+
"""
|
|
8
|
+
__version__ = "1.0.0"
|