pypdfc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypdfc-0.1.0/PKG-INFO +321 -0
- pypdfc-0.1.0/README.md +299 -0
- pypdfc-0.1.0/pdfc/__init__.py +0 -0
- pypdfc-0.1.0/pdfc/__main__.py +3 -0
- pypdfc-0.1.0/pdfc/main.py +141 -0
- pypdfc-0.1.0/pypdfc.egg-info/PKG-INFO +321 -0
- pypdfc-0.1.0/pypdfc.egg-info/SOURCES.txt +11 -0
- pypdfc-0.1.0/pypdfc.egg-info/dependency_links.txt +1 -0
- pypdfc-0.1.0/pypdfc.egg-info/entry_points.txt +2 -0
- pypdfc-0.1.0/pypdfc.egg-info/requires.txt +12 -0
- pypdfc-0.1.0/pypdfc.egg-info/top_level.txt +1 -0
- pypdfc-0.1.0/pyproject.toml +54 -0
- pypdfc-0.1.0/setup.cfg +4 -0
pypdfc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pypdfc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A command line tool for compressing PDF files
|
|
5
|
+
Author: Corvin Gröning
|
|
6
|
+
Keywords: pdf,compress,cli
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: img2pdf>=0.5.0
|
|
12
|
+
Requires-Dist: pdf2image>=1.17.0
|
|
13
|
+
Requires-Dist: Pillow>=11.0.0
|
|
14
|
+
Requires-Dist: PyYAML>=6.0.0
|
|
15
|
+
Requires-Dist: questionary>=2.0.0
|
|
16
|
+
Requires-Dist: rich>=14.0.0
|
|
17
|
+
Requires-Dist: typer>=0.15.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=9.0; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-cov>=7.0; extra == "dev"
|
|
21
|
+
Requires-Dist: coverage>=7.0; extra == "dev"
|
|
22
|
+
|
|
23
|
+
# `pdfc` – PDF Compressor for the Command Line
|
|
24
|
+
|
|
25
|
+
A command-line tool for compressing and optimising PDF files using configurable rasterization, colour mode, sharpening and contrast settings.
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
[](https://pypi.org/project/pdfc/)
|
|
30
|
+
|
|
31
|
+
<img src="https://raw.githubusercontent.com/cgroening/py-pdfc/main/images/logo_dark.png" width="200" alt="PDF Compressor Logo">
|
|
32
|
+
|
|
33
|
+
**Compressing a PDF file:**
|
|
34
|
+
|
|
35
|
+

|
|
36
|
+
|
|
37
|
+
**Interactive mode:**
|
|
38
|
+
|
|
39
|
+

|
|
40
|
+
|
|
41
|
+
**Comparing compression presets:**
|
|
42
|
+
|
|
43
|
+

|
|
44
|
+
|
|
45
|
+
## Requirements
|
|
46
|
+
|
|
47
|
+
- Python 3.11+
|
|
48
|
+
- [Poppler](https://poppler.freedesktop.org/) (for `pdf2image`)
|
|
49
|
+
- macOS: `brew install poppler`
|
|
50
|
+
- Ubuntu/Debian: `sudo apt install poppler-utils`
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```zsh
|
|
55
|
+
pip install .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For development (includes pytest, coverage):
|
|
59
|
+
|
|
60
|
+
```zsh
|
|
61
|
+
pip install ".[dev]"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Commands
|
|
65
|
+
|
|
66
|
+
### `compress` – Compress one or more PDF files
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
pdfc compress [OPTIONS] INPUT_PATH [OUTPUT_PATH]
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
| Argument / Option | Short | Description |
|
|
73
|
+
|---|---|---|
|
|
74
|
+
| `--interactive` | `-i` | Collect all settings interactively via prompts |
|
|
75
|
+
| `--verbose` | `-v` | Verbose output |
|
|
76
|
+
| `--mode` | `-m` | Colour mode: `color`, `gray` or `bw` |
|
|
77
|
+
| `--dpi` | `-d` | Resolution for rasterization in dots per inch (default: 300) |
|
|
78
|
+
| `--jpeg-quality` | `-q` | JPEG quality 1–100 (default: 30). Mutually exclusive with `--png-compression-level` |
|
|
79
|
+
| `--png-compression-level` | `-p` | PNG compression level 0–9 (default: 6). Mutually exclusive with `--jpeg-quality` |
|
|
80
|
+
| `--threshold` | `-t` | B&W threshold 0–255 (default: 150). Only used in `bw` mode |
|
|
81
|
+
| `--sharpen` | `-s` | Sharpening factor 0.0–3.0 (default: 0.0 = off) |
|
|
82
|
+
| `--contrast` | `-c` | Contrast factor 0.0–3.0 (default: 1.0 = no change) |
|
|
83
|
+
| `--unsharp-mask` | `-u` | Apply PIL UnsharpMask filter |
|
|
84
|
+
| `--tiff-ccitt` | `-T` | Use TIFF CCITT Group 4 as intermediate format (`bw` mode only) |
|
|
85
|
+
| `--no-skip` | | Process all files, including files ending with `-compressed.pdf` (which are skipped by default when input is a directory) |
|
|
86
|
+
| `INPUT_PATH` | | PDF file or directory of PDF files to compress |
|
|
87
|
+
| `OUTPUT_PATH` | | Output file (single-file mode only). Defaults to `<input>-compressed.pdf` |
|
|
88
|
+
|
|
89
|
+
**Examples:**
|
|
90
|
+
|
|
91
|
+
```zsh
|
|
92
|
+
# Compress a single file to B&W at 300 DPI
|
|
93
|
+
pdfc compress input.pdf -m bw -d 300
|
|
94
|
+
|
|
95
|
+
# Compress with a custom output path
|
|
96
|
+
pdfc compress input.pdf output.pdf -m gray -d 200 -q 50
|
|
97
|
+
|
|
98
|
+
# Compress all PDFs in a folder using defaults (skips *-compressed.pdf files)
|
|
99
|
+
pdfc compress /path/to/folder
|
|
100
|
+
|
|
101
|
+
# Compress all PDFs in a folder, including already compressed files
|
|
102
|
+
pdfc compress /path/to/folder --no-skip
|
|
103
|
+
|
|
104
|
+
# Choose settings interactively
|
|
105
|
+
pdfc compress input.pdf -i
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### `compare` – Compare compression configurations side by side
|
|
109
|
+
|
|
110
|
+
Runs all presets defined in `~/.config/pdfc/presets.yaml` against one or more
|
|
111
|
+
PDF files and writes the results into a subdirectory named after each input file.
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
Fields:
|
|
115
|
+
|
|
116
|
+
| Field name | Type | Range | Default | Description |
|
|
117
|
+
| ----------------- | -------- | ----------------------- | --- | --------------------------------------------------------------------------------------------------------------------- |
|
|
118
|
+
| `name` | `string` | `[a-zA-Z0-9_-]+` | `-` | Required. Used as the output file name. |
|
|
119
|
+
| `mode` | `string` | `color`, `gray` or `bw` | `bw` | Color space of the output: Black & white (1-bit after threshold conversion), Grayscale (8-bit) or Color (RGB, 24-bit) |
|
|
120
|
+
| `dpi` | `int` | `> 0` | `300` | Resolution for rasterization. Strongly affects both quality and file size. |
|
|
121
|
+
| `threshold` | `int` | `0-255` | `150` | Threshold for B&W conversion. Pixels above the value → white, below → black. Higher value = more white = smaller file. |
|
|
122
|
+
| `jpeg_quality` | `int` | `1-100` | `30` | JPEG quality. Lower values = smaller file, lower image quality. Enables JPEG mode. Cannot be combined with `png_compression`. |
|
|
123
|
+
| `png_compression` | `int` | `0-9` | `7` | PNG compression level. PNG compression level. Higher values lead to smaller files and longer processing times. Enables PNG mode. Cannot be combined with `jpeg_quality`. |
|
|
124
|
+
| `sharpen` | `float` | `0.0-3.0` | `1.0` | Sharpening filter (PIL ImageEnhance.Sharpness). 0.0 → off, 1.0 → no change, >1.0 → sharper. |
|
|
125
|
+
| `contrast` | `float` | `0.0-3.0` | `1.0` | Contrast filter (PIL ImageEnhance.Contrast). 1.0 → no change, >1.0 → more contrast. |
|
|
126
|
+
| `unsharp_mask` | `bool` | `true`/`false` | `false` | PIL UnsharpMask filter (radius=2, percent=150, threshold=3). Sharpens edges before conversion. |
|
|
127
|
+
| `tiff_ccitt` | `bool` | `true`/`false` | `false` | Use TIFF CCITT Group 4 intermediate format |
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
pdfc compare [OPTIONS] INPUT_PATH
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
| Option | Short | Description |
|
|
135
|
+
|---|---|---|
|
|
136
|
+
| `--dpi` | `-d` | Resolution for rasterization (default: 300) |
|
|
137
|
+
| `--verbose` | `-v` | Verbose output |
|
|
138
|
+
|
|
139
|
+
**Examples:**
|
|
140
|
+
|
|
141
|
+
```zsh
|
|
142
|
+
# Compare all presets for a single file
|
|
143
|
+
pdfc compare input.pdf
|
|
144
|
+
|
|
145
|
+
# Compare all presets for all PDFs in a folder at 200 DPI
|
|
146
|
+
pdfc compare /path/to/folder --dpi 200
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Output is written to a subdirectory next to the input file:
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
input.pdf
|
|
153
|
+
input/
|
|
154
|
+
preset-name-1.pdf
|
|
155
|
+
preset-name-2.pdf
|
|
156
|
+
...
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Presets file
|
|
160
|
+
|
|
161
|
+
The `compare` command reads presets from `~/.config/pdfc/presets.yaml`.
|
|
162
|
+
Each preset defines a named compression configuration.
|
|
163
|
+
|
|
164
|
+
**Example:**
|
|
165
|
+
|
|
166
|
+
```yaml
|
|
167
|
+
presets:
|
|
168
|
+
- name: bw-300
|
|
169
|
+
mode: bw
|
|
170
|
+
dpi: 300
|
|
171
|
+
threshold: 150
|
|
172
|
+
sharpen: 1.5
|
|
173
|
+
contrast: 1.5
|
|
174
|
+
|
|
175
|
+
- name: gray-150
|
|
176
|
+
mode: gray
|
|
177
|
+
dpi: 150
|
|
178
|
+
jpeg_quality: 40
|
|
179
|
+
|
|
180
|
+
- name: color-200
|
|
181
|
+
mode: color
|
|
182
|
+
dpi: 200
|
|
183
|
+
jpeg_quality: 60
|
|
184
|
+
sharpen: 1.3
|
|
185
|
+
contrast: 1.3
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
See `example-presets.yaml` for more presets to try.
|
|
189
|
+
|
|
190
|
+
Available preset fields:
|
|
191
|
+
|
|
192
|
+
| Field | Type | Description |
|
|
193
|
+
|---|---|---|
|
|
194
|
+
| `name` | string | Required. Used as the output file name |
|
|
195
|
+
| `mode` | string | `color`, `gray` or `bw` |
|
|
196
|
+
| `dpi` | int | Resolution for rasterization |
|
|
197
|
+
| `threshold` | int | B&W threshold 0–255 |
|
|
198
|
+
| `jpeg_quality` | int | JPEG quality 1–100 |
|
|
199
|
+
| `png_compression` | int | PNG compression level 0–9 |
|
|
200
|
+
| `sharpen` | float | Sharpening factor 0.0–3.0 |
|
|
201
|
+
| `contrast` | float | Contrast factor 0.0–3.0 |
|
|
202
|
+
| `unsharp_mask` | bool | Apply PIL UnsharpMask filter |
|
|
203
|
+
| `tiff_ccitt` | bool | Use TIFF CCITT Group 4 intermediate format |
|
|
204
|
+
|
|
205
|
+
## Compression modes
|
|
206
|
+
|
|
207
|
+
| Mode | Description |
|
|
208
|
+
|---|---|
|
|
209
|
+
| `color` | Keeps full colour. Best for photos and colour diagrams |
|
|
210
|
+
| `gray` | Converts to greyscale. Good balance of size and readability |
|
|
211
|
+
| `bw` | Converts to black & white. Smallest file size, best for text-only scans |
|
|
212
|
+
|
|
213
|
+
## Parameter reference
|
|
214
|
+
|
|
215
|
+
### Sharpen (`-s` / `--sharpen`)
|
|
216
|
+
|
|
217
|
+
Range: 0.0 to 3.0 (float)
|
|
218
|
+
|
|
219
|
+
| Value | Effect | Use case |
|
|
220
|
+
|---|---|---|
|
|
221
|
+
| **0.0** | No sharpening (off) | Default |
|
|
222
|
+
| **0.5** | Slight blur | Noise reduction |
|
|
223
|
+
| **1.0** | Original (no change) | Baseline |
|
|
224
|
+
| **1.2–1.5** | Light sharpening | Recommended for clean documents |
|
|
225
|
+
| **1.5–2.0** | Medium sharpening | Good for text |
|
|
226
|
+
| **2.0–2.5** | Strong sharpening | For blurry scans |
|
|
227
|
+
| **2.5–3.0** | Very strong sharpening | Risk of artefacts |
|
|
228
|
+
| **>3.0** | Extreme (not allowed) | Too much; artefacts likely |
|
|
229
|
+
|
|
230
|
+
Visual effect:
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
sharpen = 0.5: T e x t (blurry)
|
|
234
|
+
sharpen = 1.0: Text (original)
|
|
235
|
+
sharpen = 1.5: Text (crisper)
|
|
236
|
+
sharpen = 2.0: Text (very sharp)
|
|
237
|
+
sharpen = 3.0: Text (over-sharpened, halo artefacts)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
**Too much sharpening (> 3.0):**
|
|
241
|
+
- Halos around letters (white/black fringes)
|
|
242
|
+
- Noise amplification (grainy look)
|
|
243
|
+
- Edge artefacts
|
|
244
|
+
- Unnatural appearance
|
|
245
|
+
|
|
246
|
+
### Contrast (`-c` / `--contrast`)
|
|
247
|
+
|
|
248
|
+
Range: 0.0 to 3.0 (float)
|
|
249
|
+
|
|
250
|
+
| Value | Effect | Use case |
|
|
251
|
+
|---|---|---|
|
|
252
|
+
| **0.0** | Flat grey (no contrast) | Not useful |
|
|
253
|
+
| **0.5** | Reduced contrast | Softer images |
|
|
254
|
+
| **1.0** | Original (no change) | Baseline |
|
|
255
|
+
| **1.2–1.5** | Slightly increased contrast | Recommended for clean documents |
|
|
256
|
+
| **1.5–2.0** | Medium contrast | Good for text, clear separation |
|
|
257
|
+
| **2.0–2.5** | Strong contrast | For faded documents |
|
|
258
|
+
| **2.5–3.0** | Very strong contrast | Risk of detail loss |
|
|
259
|
+
| **>3.0** | Extreme (not allowed) | Near binary; details lost |
|
|
260
|
+
|
|
261
|
+
Visual effect:
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
contrast = 0.5: Text (washed out, grey)
|
|
265
|
+
contrast = 1.0: Text (original)
|
|
266
|
+
contrast = 1.5: Text (crisper, more defined)
|
|
267
|
+
contrast = 2.0: Text (very clear, strong separation)
|
|
268
|
+
contrast = 3.0: Text (near binary)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Too much contrast (> 3.0):**
|
|
272
|
+
- Detail loss (everything becomes black or white)
|
|
273
|
+
- No more grey tones
|
|
274
|
+
- Hard edges without transitions
|
|
275
|
+
- Information loss
|
|
276
|
+
|
|
277
|
+
### Interaction between contrast and threshold
|
|
278
|
+
|
|
279
|
+
Contrast is applied before the B&W threshold. A higher contrast value effectively
|
|
280
|
+
makes the threshold more aggressive, since pixel values are pushed further apart
|
|
281
|
+
before the cut-off is applied.
|
|
282
|
+
|
|
283
|
+
| Scenario | contrast | threshold | Pixel at 140 | Pixel at 160 |
|
|
284
|
+
|---|---|---|---|---|
|
|
285
|
+
| Low contrast | 1.0 | 150 | stays ~140 → black | stays ~160 → white |
|
|
286
|
+
| High contrast | 2.0 | 150 | pushed to ~100 → black | pushed to ~200 → white |
|
|
287
|
+
|
|
288
|
+
**Rule of thumb:** increase contrast only as much as needed; combine with the
|
|
289
|
+
threshold to control where the black/white boundary falls.
|
|
290
|
+
|
|
291
|
+
### Recommended combinations
|
|
292
|
+
|
|
293
|
+
| Document type | sharpen | contrast | Notes |
|
|
294
|
+
|---|---|---|---|
|
|
295
|
+
| Clean, digital documents | 1.3 | 1.3 | Subtle improvement, no risk |
|
|
296
|
+
| Standard scans | 1.5 | 1.5 | Best balance |
|
|
297
|
+
| Blurry or faded scans | 2.0 | 2.0 | Strong improvement, low artefact risk |
|
|
298
|
+
| Very poor scans | 2.5 | 2.5 | Maximum recommended |
|
|
299
|
+
| Last resort | 3.0 | 3.0 | Artefacts likely |
|
|
300
|
+
|
|
301
|
+
### Recommended parameter ranges (summary)
|
|
302
|
+
|
|
303
|
+
| Parameter | Minimum | Recommended | Safe maximum | Risky maximum |
|
|
304
|
+
|---|---|---|---|---|
|
|
305
|
+
| Sharpen | 0.0 (off) | 1.3–2.0 | 2.5 | 3.0 |
|
|
306
|
+
| Contrast | 0.0 (grey) | 1.3–2.0 | 2.5 | 3.0 |
|
|
307
|
+
|
|
308
|
+
## Planned features
|
|
309
|
+
|
|
310
|
+
### v0.1 – Initial release
|
|
311
|
+
|
|
312
|
+
- [x] Skip files with the suffix `-compressed.pdf` to avoid re-processing already compressed files, unless flag `--no-skip` is set
|
|
313
|
+
- [ ] Skip presets for color mode if input file is B&W or grayscale
|
|
314
|
+
- [ ] Improve/clean output messages when processing files and presets
|
|
315
|
+
|
|
316
|
+
### v0.2 – Presets management
|
|
317
|
+
|
|
318
|
+
- [ ] `presets.yaml`: Field to set one preset as default for `compress` command, warn if missing or multiple defaults
|
|
319
|
+
- [ ] Add `--preset` option to `compress` command to specify a preset from the presets file, e.g. `pdfc compress input.pdf --preset bw-300`
|
|
320
|
+
- [ ] Add command `list-presets` which lets the user interactively view, add, edit and delete presets in the presets file
|
|
321
|
+
- [ ] Add command `preset` which lets the user select a preset from the presets file and applies it to one or more PDF files
|
pypdfc-0.1.0/README.md
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# `pdfc` – PDF Compressor for the Command Line
|
|
2
|
+
|
|
3
|
+
A command-line tool for compressing and optimising PDF files using configurable rasterization, colour mode, sharpening and contrast settings.
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+

|
|
7
|
+
[](https://pypi.org/project/pdfc/)
|
|
8
|
+
|
|
9
|
+
<img src="https://raw.githubusercontent.com/cgroening/py-pdfc/main/images/logo_dark.png" width="200" alt="PDF Compressor Logo">
|
|
10
|
+
|
|
11
|
+
**Compressing a PDF file:**
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
**Interactive mode:**
|
|
16
|
+
|
|
17
|
+

|
|
18
|
+
|
|
19
|
+
**Comparing compression presets:**
|
|
20
|
+
|
|
21
|
+

|
|
22
|
+
|
|
23
|
+
## Requirements
|
|
24
|
+
|
|
25
|
+
- Python 3.11+
|
|
26
|
+
- [Poppler](https://poppler.freedesktop.org/) (for `pdf2image`)
|
|
27
|
+
- macOS: `brew install poppler`
|
|
28
|
+
- Ubuntu/Debian: `sudo apt install poppler-utils`
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
```zsh
|
|
33
|
+
pip install .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
For development (includes pytest, coverage):
|
|
37
|
+
|
|
38
|
+
```zsh
|
|
39
|
+
pip install ".[dev]"
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Commands
|
|
43
|
+
|
|
44
|
+
### `compress` – Compress one or more PDF files
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
pdfc compress [OPTIONS] INPUT_PATH [OUTPUT_PATH]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
| Argument / Option | Short | Description |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `--interactive` | `-i` | Collect all settings interactively via prompts |
|
|
53
|
+
| `--verbose` | `-v` | Verbose output |
|
|
54
|
+
| `--mode` | `-m` | Colour mode: `color`, `gray` or `bw` |
|
|
55
|
+
| `--dpi` | `-d` | Resolution for rasterization in dots per inch (default: 300) |
|
|
56
|
+
| `--jpeg-quality` | `-q` | JPEG quality 1–100 (default: 30). Mutually exclusive with `--png-compression-level` |
|
|
57
|
+
| `--png-compression-level` | `-p` | PNG compression level 0–9 (default: 6). Mutually exclusive with `--jpeg-quality` |
|
|
58
|
+
| `--threshold` | `-t` | B&W threshold 0–255 (default: 150). Only used in `bw` mode |
|
|
59
|
+
| `--sharpen` | `-s` | Sharpening factor 0.0–3.0 (default: 0.0 = off) |
|
|
60
|
+
| `--contrast` | `-c` | Contrast factor 0.0–3.0 (default: 1.0 = no change) |
|
|
61
|
+
| `--unsharp-mask` | `-u` | Apply PIL UnsharpMask filter |
|
|
62
|
+
| `--tiff-ccitt` | `-T` | Use TIFF CCITT Group 4 as intermediate format (`bw` mode only) |
|
|
63
|
+
| `--no-skip` | | Process all files, including files ending with `-compressed.pdf` (which are skipped by default when input is a directory) |
|
|
64
|
+
| `INPUT_PATH` | | PDF file or directory of PDF files to compress |
|
|
65
|
+
| `OUTPUT_PATH` | | Output file (single-file mode only). Defaults to `<input>-compressed.pdf` |
|
|
66
|
+
|
|
67
|
+
**Examples:**
|
|
68
|
+
|
|
69
|
+
```zsh
|
|
70
|
+
# Compress a single file to B&W at 300 DPI
|
|
71
|
+
pdfc compress input.pdf -m bw -d 300
|
|
72
|
+
|
|
73
|
+
# Compress with a custom output path
|
|
74
|
+
pdfc compress input.pdf output.pdf -m gray -d 200 -q 50
|
|
75
|
+
|
|
76
|
+
# Compress all PDFs in a folder using defaults (skips *-compressed.pdf files)
|
|
77
|
+
pdfc compress /path/to/folder
|
|
78
|
+
|
|
79
|
+
# Compress all PDFs in a folder, including already compressed files
|
|
80
|
+
pdfc compress /path/to/folder --no-skip
|
|
81
|
+
|
|
82
|
+
# Choose settings interactively
|
|
83
|
+
pdfc compress input.pdf -i
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `compare` – Compare compression configurations side by side
|
|
87
|
+
|
|
88
|
+
Runs all presets defined in `~/.config/pdfc/presets.yaml` against one or more
|
|
89
|
+
PDF files and writes the results into a subdirectory named after each input file.
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
Fields:
|
|
93
|
+
|
|
94
|
+
| Field name | Type | Range | Default | Description |
|
|
95
|
+
| ----------------- | -------- | ----------------------- | --- | --------------------------------------------------------------------------------------------------------------------- |
|
|
96
|
+
| `name` | `string` | `[a-zA-Z0-9_-]+` | `-` | Required. Used as the output file name. |
|
|
97
|
+
| `mode` | `string` | `color`, `gray` or `bw` | `bw` | Color space of the output: Black & white (1-bit after threshold conversion), Grayscale (8-bit) or Color (RGB, 24-bit) |
|
|
98
|
+
| `dpi` | `int` | `> 0` | `300` | Resolution for rasterization. Strongly affects both quality and file size. |
|
|
99
|
+
| `threshold` | `int` | `0-255` | `150` | Threshold for B&W conversion. Pixels above the value → white, below → black. Higher value = more white = smaller file. |
|
|
100
|
+
| `jpeg_quality` | `int` | `1-100` | `30` | JPEG quality. Lower values = smaller file, lower image quality. Enables JPEG mode. Cannot be combined with `png_compression`. |
|
|
101
|
+
| `png_compression` | `int` | `0-9` | `7` | PNG compression level. PNG compression level. Higher values lead to smaller files and longer processing times. Enables PNG mode. Cannot be combined with `jpeg_quality`. |
|
|
102
|
+
| `sharpen` | `float` | `0.0-3.0` | `1.0` | Sharpening filter (PIL ImageEnhance.Sharpness). 0.0 → off, 1.0 → no change, >1.0 → sharper. |
|
|
103
|
+
| `contrast` | `float` | `0.0-3.0` | `1.0` | Contrast filter (PIL ImageEnhance.Contrast). 1.0 → no change, >1.0 → more contrast. |
|
|
104
|
+
| `unsharp_mask` | `bool` | `true`/`false` | `false` | PIL UnsharpMask filter (radius=2, percent=150, threshold=3). Sharpens edges before conversion. |
|
|
105
|
+
| `tiff_ccitt` | `bool` | `true`/`false` | `false` | Use TIFF CCITT Group 4 intermediate format |
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
pdfc compare [OPTIONS] INPUT_PATH
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
| Option | Short | Description |
|
|
113
|
+
|---|---|---|
|
|
114
|
+
| `--dpi` | `-d` | Resolution for rasterization (default: 300) |
|
|
115
|
+
| `--verbose` | `-v` | Verbose output |
|
|
116
|
+
|
|
117
|
+
**Examples:**
|
|
118
|
+
|
|
119
|
+
```zsh
|
|
120
|
+
# Compare all presets for a single file
|
|
121
|
+
pdfc compare input.pdf
|
|
122
|
+
|
|
123
|
+
# Compare all presets for all PDFs in a folder at 200 DPI
|
|
124
|
+
pdfc compare /path/to/folder --dpi 200
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Output is written to a subdirectory next to the input file:
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
input.pdf
|
|
131
|
+
input/
|
|
132
|
+
preset-name-1.pdf
|
|
133
|
+
preset-name-2.pdf
|
|
134
|
+
...
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Presets file
|
|
138
|
+
|
|
139
|
+
The `compare` command reads presets from `~/.config/pdfc/presets.yaml`.
|
|
140
|
+
Each preset defines a named compression configuration.
|
|
141
|
+
|
|
142
|
+
**Example:**
|
|
143
|
+
|
|
144
|
+
```yaml
|
|
145
|
+
presets:
|
|
146
|
+
- name: bw-300
|
|
147
|
+
mode: bw
|
|
148
|
+
dpi: 300
|
|
149
|
+
threshold: 150
|
|
150
|
+
sharpen: 1.5
|
|
151
|
+
contrast: 1.5
|
|
152
|
+
|
|
153
|
+
- name: gray-150
|
|
154
|
+
mode: gray
|
|
155
|
+
dpi: 150
|
|
156
|
+
jpeg_quality: 40
|
|
157
|
+
|
|
158
|
+
- name: color-200
|
|
159
|
+
mode: color
|
|
160
|
+
dpi: 200
|
|
161
|
+
jpeg_quality: 60
|
|
162
|
+
sharpen: 1.3
|
|
163
|
+
contrast: 1.3
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
See `example-presets.yaml` for more presets to try.
|
|
167
|
+
|
|
168
|
+
Available preset fields:
|
|
169
|
+
|
|
170
|
+
| Field | Type | Description |
|
|
171
|
+
|---|---|---|
|
|
172
|
+
| `name` | string | Required. Used as the output file name |
|
|
173
|
+
| `mode` | string | `color`, `gray` or `bw` |
|
|
174
|
+
| `dpi` | int | Resolution for rasterization |
|
|
175
|
+
| `threshold` | int | B&W threshold 0–255 |
|
|
176
|
+
| `jpeg_quality` | int | JPEG quality 1–100 |
|
|
177
|
+
| `png_compression` | int | PNG compression level 0–9 |
|
|
178
|
+
| `sharpen` | float | Sharpening factor 0.0–3.0 |
|
|
179
|
+
| `contrast` | float | Contrast factor 0.0–3.0 |
|
|
180
|
+
| `unsharp_mask` | bool | Apply PIL UnsharpMask filter |
|
|
181
|
+
| `tiff_ccitt` | bool | Use TIFF CCITT Group 4 intermediate format |
|
|
182
|
+
|
|
183
|
+
## Compression modes
|
|
184
|
+
|
|
185
|
+
| Mode | Description |
|
|
186
|
+
|---|---|
|
|
187
|
+
| `color` | Keeps full colour. Best for photos and colour diagrams |
|
|
188
|
+
| `gray` | Converts to greyscale. Good balance of size and readability |
|
|
189
|
+
| `bw` | Converts to black & white. Smallest file size, best for text-only scans |
|
|
190
|
+
|
|
191
|
+
## Parameter reference
|
|
192
|
+
|
|
193
|
+
### Sharpen (`-s` / `--sharpen`)
|
|
194
|
+
|
|
195
|
+
Range: 0.0 to 3.0 (float)
|
|
196
|
+
|
|
197
|
+
| Value | Effect | Use case |
|
|
198
|
+
|---|---|---|
|
|
199
|
+
| **0.0** | No sharpening (off) | Default |
|
|
200
|
+
| **0.5** | Slight blur | Noise reduction |
|
|
201
|
+
| **1.0** | Original (no change) | Baseline |
|
|
202
|
+
| **1.2–1.5** | Light sharpening | Recommended for clean documents |
|
|
203
|
+
| **1.5–2.0** | Medium sharpening | Good for text |
|
|
204
|
+
| **2.0–2.5** | Strong sharpening | For blurry scans |
|
|
205
|
+
| **2.5–3.0** | Very strong sharpening | Risk of artefacts |
|
|
206
|
+
| **>3.0** | Extreme (not allowed) | Too much; artefacts likely |
|
|
207
|
+
|
|
208
|
+
Visual effect:
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
sharpen = 0.5: T e x t (blurry)
|
|
212
|
+
sharpen = 1.0: Text (original)
|
|
213
|
+
sharpen = 1.5: Text (crisper)
|
|
214
|
+
sharpen = 2.0: Text (very sharp)
|
|
215
|
+
sharpen = 3.0: Text (over-sharpened, halo artefacts)
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
**Too much sharpening (> 3.0):**
|
|
219
|
+
- Halos around letters (white/black fringes)
|
|
220
|
+
- Noise amplification (grainy look)
|
|
221
|
+
- Edge artefacts
|
|
222
|
+
- Unnatural appearance
|
|
223
|
+
|
|
224
|
+
### Contrast (`-c` / `--contrast`)
|
|
225
|
+
|
|
226
|
+
Range: 0.0 to 3.0 (float)
|
|
227
|
+
|
|
228
|
+
| Value | Effect | Use case |
|
|
229
|
+
|---|---|---|
|
|
230
|
+
| **0.0** | Flat grey (no contrast) | Not useful |
|
|
231
|
+
| **0.5** | Reduced contrast | Softer images |
|
|
232
|
+
| **1.0** | Original (no change) | Baseline |
|
|
233
|
+
| **1.2–1.5** | Slightly increased contrast | Recommended for clean documents |
|
|
234
|
+
| **1.5–2.0** | Medium contrast | Good for text, clear separation |
|
|
235
|
+
| **2.0–2.5** | Strong contrast | For faded documents |
|
|
236
|
+
| **2.5–3.0** | Very strong contrast | Risk of detail loss |
|
|
237
|
+
| **>3.0** | Extreme (not allowed) | Near binary; details lost |
|
|
238
|
+
|
|
239
|
+
Visual effect:
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
contrast = 0.5: Text (washed out, grey)
|
|
243
|
+
contrast = 1.0: Text (original)
|
|
244
|
+
contrast = 1.5: Text (crisper, more defined)
|
|
245
|
+
contrast = 2.0: Text (very clear, strong separation)
|
|
246
|
+
contrast = 3.0: Text (near binary)
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
**Too much contrast (> 3.0):**
|
|
250
|
+
- Detail loss (everything becomes black or white)
|
|
251
|
+
- No more grey tones
|
|
252
|
+
- Hard edges without transitions
|
|
253
|
+
- Information loss
|
|
254
|
+
|
|
255
|
+
### Interaction between contrast and threshold
|
|
256
|
+
|
|
257
|
+
Contrast is applied before the B&W threshold. A higher contrast value effectively
|
|
258
|
+
makes the threshold more aggressive, since pixel values are pushed further apart
|
|
259
|
+
before the cut-off is applied.
|
|
260
|
+
|
|
261
|
+
| Scenario | contrast | threshold | Pixel at 140 | Pixel at 160 |
|
|
262
|
+
|---|---|---|---|---|
|
|
263
|
+
| Low contrast | 1.0 | 150 | stays ~140 → black | stays ~160 → white |
|
|
264
|
+
| High contrast | 2.0 | 150 | pushed to ~100 → black | pushed to ~200 → white |
|
|
265
|
+
|
|
266
|
+
**Rule of thumb:** increase contrast only as much as needed; combine with the
|
|
267
|
+
threshold to control where the black/white boundary falls.
|
|
268
|
+
|
|
269
|
+
### Recommended combinations
|
|
270
|
+
|
|
271
|
+
| Document type | sharpen | contrast | Notes |
|
|
272
|
+
|---|---|---|---|
|
|
273
|
+
| Clean, digital documents | 1.3 | 1.3 | Subtle improvement, no risk |
|
|
274
|
+
| Standard scans | 1.5 | 1.5 | Best balance |
|
|
275
|
+
| Blurry or faded scans | 2.0 | 2.0 | Strong improvement, low artefact risk |
|
|
276
|
+
| Very poor scans | 2.5 | 2.5 | Maximum recommended |
|
|
277
|
+
| Last resort | 3.0 | 3.0 | Artefacts likely |
|
|
278
|
+
|
|
279
|
+
### Recommended parameter ranges (summary)
|
|
280
|
+
|
|
281
|
+
| Parameter | Minimum | Recommended | Safe maximum | Risky maximum |
|
|
282
|
+
|---|---|---|---|---|
|
|
283
|
+
| Sharpen | 0.0 (off) | 1.3–2.0 | 2.5 | 3.0 |
|
|
284
|
+
| Contrast | 0.0 (grey) | 1.3–2.0 | 2.5 | 3.0 |
|
|
285
|
+
|
|
286
|
+
## Planned features
|
|
287
|
+
|
|
288
|
+
### v0.1 – Initial release
|
|
289
|
+
|
|
290
|
+
- [x] Skip files with the suffix `-compressed.pdf` to avoid re-processing already compressed files, unless flag `--no-skip` is set
|
|
291
|
+
- [ ] Skip presets for color mode if input file is B&W or grayscale
|
|
292
|
+
- [ ] Improve/clean output messages when processing files and presets
|
|
293
|
+
|
|
294
|
+
### v0.2 – Presets management
|
|
295
|
+
|
|
296
|
+
- [ ] `presets.yaml`: Field to set one preset as default for `compress` command, warn if missing or multiple defaults
|
|
297
|
+
- [ ] Add `--preset` option to `compress` command to specify a preset from the presets file, e.g. `pdfc compress input.pdf --preset bw-300`
|
|
298
|
+
- [ ] Add command `list-presets` which lets the user interactively view, add, edit and delete presets in the presets file
|
|
299
|
+
- [ ] Add command `preset` which lets the user select a preset from the presets file and applies it to one or more PDF files
|
|
File without changes
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from pdfc.cli.compress_parameters import CompressRequest
|
|
6
|
+
from pdfc.cli.commands.compress_input import InputView
|
|
7
|
+
from pdfc.cli.commands.compress import CompressCommand
|
|
8
|
+
from pdfc.cli.commands.compare import CompareCommand
|
|
9
|
+
from pdfc.domain.models import CompressionSettings
|
|
10
|
+
from pdfc.services.compression import CompressionService
|
|
11
|
+
from pdfc.storage.pdf_compressor import PdfCompressor
|
|
12
|
+
from pdfc.storage.presets_storage import PresetsStorage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(
|
|
16
|
+
help='Compress and optimise PDF files using various algorithms.',
|
|
17
|
+
no_args_is_help=True,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Dependency composition (Composition Root)
|
|
21
|
+
_compressor = PdfCompressor()
|
|
22
|
+
_presets = PresetsStorage()
|
|
23
|
+
_service = CompressionService(_compressor, _presets)
|
|
24
|
+
_input_view = InputView()
|
|
25
|
+
_compress_cmd = CompressCommand(_service, _input_view)
|
|
26
|
+
_compare_cmd = CompareCommand(_service,)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@app.command()
|
|
30
|
+
def compress(
|
|
31
|
+
input_path: Path = typer.Argument(
|
|
32
|
+
...,
|
|
33
|
+
help='PDF file or directory of PDF files to compress.',
|
|
34
|
+
exists=True,
|
|
35
|
+
file_okay=True,
|
|
36
|
+
dir_okay=True,
|
|
37
|
+
resolve_path=True,
|
|
38
|
+
),
|
|
39
|
+
output_path: Optional[Path] = typer.Argument(
|
|
40
|
+
None,
|
|
41
|
+
help=(
|
|
42
|
+
'Output file path (single-file mode only). '
|
|
43
|
+
'Defaults to <input>-compressed.pdf.'
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
interactive_mode: bool = typer.Option(
|
|
47
|
+
False, '-i', '--interactive',
|
|
48
|
+
help='Collect compression settings interactively.',
|
|
49
|
+
),
|
|
50
|
+
mode: Optional[str] = typer.Option(
|
|
51
|
+
None, '-m', '--mode',
|
|
52
|
+
help='Compression mode: color | gray | bw.',
|
|
53
|
+
),
|
|
54
|
+
dpi: Optional[int] = typer.Option(
|
|
55
|
+
None, '-d', '--dpi',
|
|
56
|
+
help='Resolution for rasterisation (dots per inch).',
|
|
57
|
+
),
|
|
58
|
+
jpeg_quality: Optional[int] = typer.Option(
|
|
59
|
+
None, '-q', '--jpeg-quality',
|
|
60
|
+
help='JPEG quality 1–100. Mutually exclusive with --png-compression-level.',
|
|
61
|
+
),
|
|
62
|
+
png_compression_level: Optional[int] = typer.Option(
|
|
63
|
+
None, '-p', '--png-compression-level',
|
|
64
|
+
help='PNG compression level 0–9. Mutually exclusive with --jpeg-quality.',
|
|
65
|
+
),
|
|
66
|
+
threshold: Optional[int] = typer.Option(
|
|
67
|
+
None, '-t', '--threshold',
|
|
68
|
+
help='B&W threshold 0–255 (only used in bw mode).',
|
|
69
|
+
),
|
|
70
|
+
sharpen: Optional[float] = typer.Option(
|
|
71
|
+
None, '-s', '--sharpen',
|
|
72
|
+
help='Sharpening factor 0.0–3.0 (0 = off).',
|
|
73
|
+
),
|
|
74
|
+
contrast: Optional[float] = typer.Option(
|
|
75
|
+
None, '-c', '--contrast',
|
|
76
|
+
help='Contrast factor 0.0–3.0 (1.0 = no change).',
|
|
77
|
+
),
|
|
78
|
+
unsharp_mask: bool = typer.Option(
|
|
79
|
+
False, '-u', '--unsharp-mask',
|
|
80
|
+
help='Apply PIL UnsharpMask filter.',
|
|
81
|
+
),
|
|
82
|
+
tiff_ccitt: bool = typer.Option(
|
|
83
|
+
False, '-T', '--tiff-ccitt',
|
|
84
|
+
help='Use TIFF CCITT Group 4 as intermediate format (bw mode only).',
|
|
85
|
+
),
|
|
86
|
+
no_skip: bool = typer.Option(
|
|
87
|
+
False, '--no-skip',
|
|
88
|
+
help=(
|
|
89
|
+
'Process all files in the directory, including files ending with '
|
|
90
|
+
'"-compressed.pdf" (which are skipped by default).'
|
|
91
|
+
),
|
|
92
|
+
),
|
|
93
|
+
):
|
|
94
|
+
if jpeg_quality is not None and png_compression_level is not None:
|
|
95
|
+
raise typer.BadParameter(
|
|
96
|
+
'Cannot use --jpeg-quality and --png-compression-level at the same time.'
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
compression_settings = CompressionSettings(
|
|
100
|
+
_mode=mode,
|
|
101
|
+
_dpi=dpi,
|
|
102
|
+
_jpeg_quality=jpeg_quality,
|
|
103
|
+
_png_compression=png_compression_level,
|
|
104
|
+
_bw_threshold=threshold,
|
|
105
|
+
_sharpen=sharpen,
|
|
106
|
+
_contrast=contrast,
|
|
107
|
+
_unsharp_mask=unsharp_mask,
|
|
108
|
+
_tiff_ccitt=tiff_ccitt,
|
|
109
|
+
)
|
|
110
|
+
compress_request = CompressRequest(
|
|
111
|
+
interactive_mode=interactive_mode,
|
|
112
|
+
input_path=input_path,
|
|
113
|
+
output_path=output_path,
|
|
114
|
+
compression_settings=compression_settings,
|
|
115
|
+
no_skip=no_skip,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
_compress_cmd.run(compress_request)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.command()
|
|
122
|
+
def compare(
|
|
123
|
+
input_path: Path = typer.Argument(
|
|
124
|
+
...,
|
|
125
|
+
help='PDF file or directory of PDF files to compare.',
|
|
126
|
+
exists=True,
|
|
127
|
+
file_okay=True,
|
|
128
|
+
dir_okay=True,
|
|
129
|
+
resolve_path=True,
|
|
130
|
+
),
|
|
131
|
+
dpi: int = typer.Option(
|
|
132
|
+
300, '-d', '--dpi',
|
|
133
|
+
help='Resolution for rasterization (dots per inch). Default: 300.',
|
|
134
|
+
)
|
|
135
|
+
):
|
|
136
|
+
_compare_cmd.run(input_path=input_path, dpi=dpi)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def main() -> None:
|
|
140
|
+
app()
|
|
141
|
+
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pypdfc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A command line tool for compressing PDF files
|
|
5
|
+
Author: Corvin Gröning
|
|
6
|
+
Keywords: pdf,compress,cli
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: img2pdf>=0.5.0
|
|
12
|
+
Requires-Dist: pdf2image>=1.17.0
|
|
13
|
+
Requires-Dist: Pillow>=11.0.0
|
|
14
|
+
Requires-Dist: PyYAML>=6.0.0
|
|
15
|
+
Requires-Dist: questionary>=2.0.0
|
|
16
|
+
Requires-Dist: rich>=14.0.0
|
|
17
|
+
Requires-Dist: typer>=0.15.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=9.0; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-cov>=7.0; extra == "dev"
|
|
21
|
+
Requires-Dist: coverage>=7.0; extra == "dev"
|
|
22
|
+
|
|
23
|
+
# `pdfc` – PDF Compressor for the Command Line
|
|
24
|
+
|
|
25
|
+
A command-line tool for compressing and optimising PDF files using configurable rasterization, colour mode, sharpening and contrast settings.
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
[](https://pypi.org/project/pdfc/)
|
|
30
|
+
|
|
31
|
+
<img src="https://raw.githubusercontent.com/cgroening/py-pdfc/main/images/logo_dark.png" width="200" alt="PDF Compressor Logo">
|
|
32
|
+
|
|
33
|
+
**Compressing a PDF file:**
|
|
34
|
+
|
|
35
|
+

|
|
36
|
+
|
|
37
|
+
**Interactive mode:**
|
|
38
|
+
|
|
39
|
+

|
|
40
|
+
|
|
41
|
+
**Comparing compression presets:**
|
|
42
|
+
|
|
43
|
+

|
|
44
|
+
|
|
45
|
+
## Requirements
|
|
46
|
+
|
|
47
|
+
- Python 3.11+
|
|
48
|
+
- [Poppler](https://poppler.freedesktop.org/) (for `pdf2image`)
|
|
49
|
+
- macOS: `brew install poppler`
|
|
50
|
+
- Ubuntu/Debian: `sudo apt install poppler-utils`
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```zsh
|
|
55
|
+
pip install .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For development (includes pytest, coverage):
|
|
59
|
+
|
|
60
|
+
```zsh
|
|
61
|
+
pip install ".[dev]"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Commands
|
|
65
|
+
|
|
66
|
+
### `compress` – Compress one or more PDF files
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
pdfc compress [OPTIONS] INPUT_PATH [OUTPUT_PATH]
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
| Argument / Option | Short | Description |
|
|
73
|
+
|---|---|---|
|
|
74
|
+
| `--interactive` | `-i` | Collect all settings interactively via prompts |
|
|
75
|
+
| `--verbose` | `-v` | Verbose output |
|
|
76
|
+
| `--mode` | `-m` | Colour mode: `color`, `gray` or `bw` |
|
|
77
|
+
| `--dpi` | `-d` | Resolution for rasterization in dots per inch (default: 300) |
|
|
78
|
+
| `--jpeg-quality` | `-q` | JPEG quality 1–100 (default: 30). Mutually exclusive with `--png-compression-level` |
|
|
79
|
+
| `--png-compression-level` | `-p` | PNG compression level 0–9 (default: 6). Mutually exclusive with `--jpeg-quality` |
|
|
80
|
+
| `--threshold` | `-t` | B&W threshold 0–255 (default: 150). Only used in `bw` mode |
|
|
81
|
+
| `--sharpen` | `-s` | Sharpening factor 0.0–3.0 (default: 0.0 = off) |
|
|
82
|
+
| `--contrast` | `-c` | Contrast factor 0.0–3.0 (default: 1.0 = no change) |
|
|
83
|
+
| `--unsharp-mask` | `-u` | Apply PIL UnsharpMask filter |
|
|
84
|
+
| `--tiff-ccitt` | `-T` | Use TIFF CCITT Group 4 as intermediate format (`bw` mode only) |
|
|
85
|
+
| `--no-skip` | | Process all files, including files ending with `-compressed.pdf` (which are skipped by default when input is a directory) |
|
|
86
|
+
| `INPUT_PATH` | | PDF file or directory of PDF files to compress |
|
|
87
|
+
| `OUTPUT_PATH` | | Output file (single-file mode only). Defaults to `<input>-compressed.pdf` |
|
|
88
|
+
|
|
89
|
+
**Examples:**
|
|
90
|
+
|
|
91
|
+
```zsh
|
|
92
|
+
# Compress a single file to B&W at 300 DPI
|
|
93
|
+
pdfc compress input.pdf -m bw -d 300
|
|
94
|
+
|
|
95
|
+
# Compress with a custom output path
|
|
96
|
+
pdfc compress input.pdf output.pdf -m gray -d 200 -q 50
|
|
97
|
+
|
|
98
|
+
# Compress all PDFs in a folder using defaults (skips *-compressed.pdf files)
|
|
99
|
+
pdfc compress /path/to/folder
|
|
100
|
+
|
|
101
|
+
# Compress all PDFs in a folder, including already compressed files
|
|
102
|
+
pdfc compress /path/to/folder --no-skip
|
|
103
|
+
|
|
104
|
+
# Choose settings interactively
|
|
105
|
+
pdfc compress input.pdf -i
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### `compare` – Compare compression configurations side by side
|
|
109
|
+
|
|
110
|
+
Runs all presets defined in `~/.config/pdfc/presets.yaml` against one or more
|
|
111
|
+
PDF files and writes the results into a subdirectory named after each input file.
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
Fields:
|
|
115
|
+
|
|
116
|
+
| Field name | Type | Range | Default | Description |
|
|
117
|
+
| ----------------- | -------- | ----------------------- | --- | --------------------------------------------------------------------------------------------------------------------- |
|
|
118
|
+
| `name` | `string` | `[a-zA-Z0-9_-]+` | `-` | Required. Used as the output file name. |
|
|
119
|
+
| `mode` | `string` | `color`, `gray` or `bw` | `bw` | Color space of the output: Black & white (1-bit after threshold conversion), Grayscale (8-bit) or Color (RGB, 24-bit) |
|
|
120
|
+
| `dpi` | `int` | `> 0` | `300` | Resolution for rasterization. Strongly affects both quality and file size. |
|
|
121
|
+
| `threshold` | `int` | `0-255` | `150` | Threshold for B&W conversion. Pixels above the value → white, below → black. Higher value = more white = smaller file. |
|
|
122
|
+
| `jpeg_quality` | `int` | `1-100` | `30` | JPEG quality. Lower values = smaller file, lower image quality. Enables JPEG mode. Cannot be combined with `png_compression`. |
|
|
123
|
+
| `png_compression` | `int` | `0-9` | `7` | PNG compression level. PNG compression level. Higher values lead to smaller files and longer processing times. Enables PNG mode. Cannot be combined with `jpeg_quality`. |
|
|
124
|
+
| `sharpen` | `float` | `0.0-3.0` | `1.0` | Sharpening filter (PIL ImageEnhance.Sharpness). 0.0 → off, 1.0 → no change, >1.0 → sharper. |
|
|
125
|
+
| `contrast` | `float` | `0.0-3.0` | `1.0` | Contrast filter (PIL ImageEnhance.Contrast). 1.0 → no change, >1.0 → more contrast. |
|
|
126
|
+
| `unsharp_mask` | `bool` | `true`/`false` | `false` | PIL UnsharpMask filter (radius=2, percent=150, threshold=3). Sharpens edges before conversion. |
|
|
127
|
+
| `tiff_ccitt` | `bool` | `true`/`false` | `false` | Use TIFF CCITT Group 4 intermediate format |
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
pdfc compare [OPTIONS] INPUT_PATH
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
| Option | Short | Description |
|
|
135
|
+
|---|---|---|
|
|
136
|
+
| `--dpi` | `-d` | Resolution for rasterization (default: 300) |
|
|
137
|
+
| `--verbose` | `-v` | Verbose output |
|
|
138
|
+
|
|
139
|
+
**Examples:**
|
|
140
|
+
|
|
141
|
+
```zsh
|
|
142
|
+
# Compare all presets for a single file
|
|
143
|
+
pdfc compare input.pdf
|
|
144
|
+
|
|
145
|
+
# Compare all presets for all PDFs in a folder at 200 DPI
|
|
146
|
+
pdfc compare /path/to/folder --dpi 200
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Output is written to a subdirectory next to the input file:
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
input.pdf
|
|
153
|
+
input/
|
|
154
|
+
preset-name-1.pdf
|
|
155
|
+
preset-name-2.pdf
|
|
156
|
+
...
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Presets file
|
|
160
|
+
|
|
161
|
+
The `compare` command reads presets from `~/.config/pdfc/presets.yaml`.
|
|
162
|
+
Each preset defines a named compression configuration.
|
|
163
|
+
|
|
164
|
+
**Example:**
|
|
165
|
+
|
|
166
|
+
```yaml
|
|
167
|
+
presets:
|
|
168
|
+
- name: bw-300
|
|
169
|
+
mode: bw
|
|
170
|
+
dpi: 300
|
|
171
|
+
threshold: 150
|
|
172
|
+
sharpen: 1.5
|
|
173
|
+
contrast: 1.5
|
|
174
|
+
|
|
175
|
+
- name: gray-150
|
|
176
|
+
mode: gray
|
|
177
|
+
dpi: 150
|
|
178
|
+
jpeg_quality: 40
|
|
179
|
+
|
|
180
|
+
- name: color-200
|
|
181
|
+
mode: color
|
|
182
|
+
dpi: 200
|
|
183
|
+
jpeg_quality: 60
|
|
184
|
+
sharpen: 1.3
|
|
185
|
+
contrast: 1.3
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
See `example-presets.yaml` for more presets to try.
|
|
189
|
+
|
|
190
|
+
Available preset fields:
|
|
191
|
+
|
|
192
|
+
| Field | Type | Description |
|
|
193
|
+
|---|---|---|
|
|
194
|
+
| `name` | string | Required. Used as the output file name |
|
|
195
|
+
| `mode` | string | `color`, `gray` or `bw` |
|
|
196
|
+
| `dpi` | int | Resolution for rasterization |
|
|
197
|
+
| `threshold` | int | B&W threshold 0–255 |
|
|
198
|
+
| `jpeg_quality` | int | JPEG quality 1–100 |
|
|
199
|
+
| `png_compression` | int | PNG compression level 0–9 |
|
|
200
|
+
| `sharpen` | float | Sharpening factor 0.0–3.0 |
|
|
201
|
+
| `contrast` | float | Contrast factor 0.0–3.0 |
|
|
202
|
+
| `unsharp_mask` | bool | Apply PIL UnsharpMask filter |
|
|
203
|
+
| `tiff_ccitt` | bool | Use TIFF CCITT Group 4 intermediate format |
|
|
204
|
+
|
|
205
|
+
## Compression modes
|
|
206
|
+
|
|
207
|
+
| Mode | Description |
|
|
208
|
+
|---|---|
|
|
209
|
+
| `color` | Keeps full colour. Best for photos and colour diagrams |
|
|
210
|
+
| `gray` | Converts to greyscale. Good balance of size and readability |
|
|
211
|
+
| `bw` | Converts to black & white. Smallest file size, best for text-only scans |
|
|
212
|
+
|
|
213
|
+
## Parameter reference
|
|
214
|
+
|
|
215
|
+
### Sharpen (`-s` / `--sharpen`)
|
|
216
|
+
|
|
217
|
+
Range: 0.0 to 3.0 (float)
|
|
218
|
+
|
|
219
|
+
| Value | Effect | Use case |
|
|
220
|
+
|---|---|---|
|
|
221
|
+
| **0.0** | No sharpening (off) | Default |
|
|
222
|
+
| **0.5** | Slight blur | Noise reduction |
|
|
223
|
+
| **1.0** | Original (no change) | Baseline |
|
|
224
|
+
| **1.2–1.5** | Light sharpening | Recommended for clean documents |
|
|
225
|
+
| **1.5–2.0** | Medium sharpening | Good for text |
|
|
226
|
+
| **2.0–2.5** | Strong sharpening | For blurry scans |
|
|
227
|
+
| **2.5–3.0** | Very strong sharpening | Risk of artefacts |
|
|
228
|
+
| **>3.0** | Extreme (not allowed) | Too much; artefacts likely |
|
|
229
|
+
|
|
230
|
+
Visual effect:
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
sharpen = 0.5: T e x t (blurry)
|
|
234
|
+
sharpen = 1.0: Text (original)
|
|
235
|
+
sharpen = 1.5: Text (crisper)
|
|
236
|
+
sharpen = 2.0: Text (very sharp)
|
|
237
|
+
sharpen = 3.0: Text (over-sharpened, halo artefacts)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
**Too much sharpening (> 3.0):**
|
|
241
|
+
- Halos around letters (white/black fringes)
|
|
242
|
+
- Noise amplification (grainy look)
|
|
243
|
+
- Edge artefacts
|
|
244
|
+
- Unnatural appearance
|
|
245
|
+
|
|
246
|
+
### Contrast (`-c` / `--contrast`)
|
|
247
|
+
|
|
248
|
+
Range: 0.0 to 3.0 (float)
|
|
249
|
+
|
|
250
|
+
| Value | Effect | Use case |
|
|
251
|
+
|---|---|---|
|
|
252
|
+
| **0.0** | Flat grey (no contrast) | Not useful |
|
|
253
|
+
| **0.5** | Reduced contrast | Softer images |
|
|
254
|
+
| **1.0** | Original (no change) | Baseline |
|
|
255
|
+
| **1.2–1.5** | Slightly increased contrast | Recommended for clean documents |
|
|
256
|
+
| **1.5–2.0** | Medium contrast | Good for text, clear separation |
|
|
257
|
+
| **2.0–2.5** | Strong contrast | For faded documents |
|
|
258
|
+
| **2.5–3.0** | Very strong contrast | Risk of detail loss |
|
|
259
|
+
| **>3.0** | Extreme (not allowed) | Near binary; details lost |
|
|
260
|
+
|
|
261
|
+
Visual effect:
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
contrast = 0.5: Text (washed out, grey)
|
|
265
|
+
contrast = 1.0: Text (original)
|
|
266
|
+
contrast = 1.5: Text (crisper, more defined)
|
|
267
|
+
contrast = 2.0: Text (very clear, strong separation)
|
|
268
|
+
contrast = 3.0: Text (near binary)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Too much contrast (> 3.0):**
|
|
272
|
+
- Detail loss (everything becomes black or white)
|
|
273
|
+
- No more grey tones
|
|
274
|
+
- Hard edges without transitions
|
|
275
|
+
- Information loss
|
|
276
|
+
|
|
277
|
+
### Interaction between contrast and threshold
|
|
278
|
+
|
|
279
|
+
Contrast is applied before the B&W threshold. A higher contrast value effectively
|
|
280
|
+
makes the threshold more aggressive, since pixel values are pushed further apart
|
|
281
|
+
before the cut-off is applied.
|
|
282
|
+
|
|
283
|
+
| Scenario | contrast | threshold | Pixel at 140 | Pixel at 160 |
|
|
284
|
+
|---|---|---|---|---|
|
|
285
|
+
| Low contrast | 1.0 | 150 | stays ~140 → black | stays ~160 → white |
|
|
286
|
+
| High contrast | 2.0 | 150 | pushed to ~100 → black | pushed to ~200 → white |
|
|
287
|
+
|
|
288
|
+
**Rule of thumb:** increase contrast only as much as needed; combine with the
|
|
289
|
+
threshold to control where the black/white boundary falls.
|
|
290
|
+
|
|
291
|
+
### Recommended combinations
|
|
292
|
+
|
|
293
|
+
| Document type | sharpen | contrast | Notes |
|
|
294
|
+
|---|---|---|---|
|
|
295
|
+
| Clean, digital documents | 1.3 | 1.3 | Subtle improvement, no risk |
|
|
296
|
+
| Standard scans | 1.5 | 1.5 | Best balance |
|
|
297
|
+
| Blurry or faded scans | 2.0 | 2.0 | Strong improvement, low artefact risk |
|
|
298
|
+
| Very poor scans | 2.5 | 2.5 | Maximum recommended |
|
|
299
|
+
| Last resort | 3.0 | 3.0 | Artefacts likely |
|
|
300
|
+
|
|
301
|
+
### Recommended parameter ranges (summary)
|
|
302
|
+
|
|
303
|
+
| Parameter | Minimum | Recommended | Safe maximum | Risky maximum |
|
|
304
|
+
|---|---|---|---|---|
|
|
305
|
+
| Sharpen | 0.0 (off) | 1.3–2.0 | 2.5 | 3.0 |
|
|
306
|
+
| Contrast | 0.0 (grey) | 1.3–2.0 | 2.5 | 3.0 |
|
|
307
|
+
|
|
308
|
+
## Planned features
|
|
309
|
+
|
|
310
|
+
### v0.1 – Initial release
|
|
311
|
+
|
|
312
|
+
- [x] Skip files with the suffix `-compressed.pdf` to avoid re-processing already compressed files, unless flag `--no-skip` is set
|
|
313
|
+
- [ ] Skip presets for color mode if input file is B&W or grayscale
|
|
314
|
+
- [ ] Improve/clean output messages when processing files and presets
|
|
315
|
+
|
|
316
|
+
### v0.2 – Presets management
|
|
317
|
+
|
|
318
|
+
- [ ] `presets.yaml`: Field to set one preset as default for `compress` command, warn if missing or multiple defaults
|
|
319
|
+
- [ ] Add `--preset` option to `compress` command to specify a preset from the presets file, e.g. `pdfc compress input.pdf --preset bw-300`
|
|
320
|
+
- [ ] Add command `list-presets` which lets the user interactively view, add, edit and delete presets in the presets file
|
|
321
|
+
- [ ] Add command `preset` which lets the user select a preset from the presets file and applies it to one or more PDF files
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
pdfc/__init__.py
|
|
4
|
+
pdfc/__main__.py
|
|
5
|
+
pdfc/main.py
|
|
6
|
+
pypdfc.egg-info/PKG-INFO
|
|
7
|
+
pypdfc.egg-info/SOURCES.txt
|
|
8
|
+
pypdfc.egg-info/dependency_links.txt
|
|
9
|
+
pypdfc.egg-info/entry_points.txt
|
|
10
|
+
pypdfc.egg-info/requires.txt
|
|
11
|
+
pypdfc.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pdfc
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pypdfc"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A command line tool for compressing PDF files"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Corvin Gröning" }
|
|
9
|
+
]
|
|
10
|
+
keywords = ["pdf", "compress", "cli"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Environment :: Console",
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"img2pdf>=0.5.0",
|
|
17
|
+
"pdf2image>=1.17.0",
|
|
18
|
+
"Pillow>=11.0.0",
|
|
19
|
+
"PyYAML>=6.0.0",
|
|
20
|
+
"questionary>=2.0.0",
|
|
21
|
+
"rich>=14.0.0",
|
|
22
|
+
"typer>=0.15.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.optional-dependencies]
|
|
26
|
+
dev = [
|
|
27
|
+
"pytest>=9.0",
|
|
28
|
+
"pytest-cov>=7.0",
|
|
29
|
+
"coverage>=7.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[tool.setuptools]
|
|
33
|
+
packages = ["pdfc"]
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
pdfc = "pdfc.main:main"
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
testpaths = ["pdfc/tests"]
|
|
40
|
+
|
|
41
|
+
[tool.mypy]
|
|
42
|
+
check_untyped_defs = true
|
|
43
|
+
|
|
44
|
+
[tool.pyright]
|
|
45
|
+
exclude = [
|
|
46
|
+
"**/.*",
|
|
47
|
+
"**/__pycache__",
|
|
48
|
+
"**/*.pyc",
|
|
49
|
+
".venv",
|
|
50
|
+
"venv",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[tool.ruff.lint]
|
|
54
|
+
ignore = ["E702"]
|
pypdfc-0.1.0/setup.cfg
ADDED