pompelmi 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ # Error Handling
2
+
3
+ pompelmi has two distinct failure modes that require different handling: **rejected Promises** (the function threw) and **`Verdict.ScanError`** (the scan completed but could not determine safety). Understanding the difference is critical to building a secure upload pipeline.
4
+
5
+ ---
6
+
7
+ ## The two failure modes
8
+
9
+ ### 1. Rejected Promise (the scan function threw)
10
+
11
+ `scan()`, `scanBuffer()`, `scanStream()`, and `scanDirectory()` reject when something prevents the scan from running at all — not when the scan completes and finds a problem.
12
+
13
+ Common rejection causes:
14
+
15
+ | Error message | Cause |
16
+ |---------------|-------|
17
+ | `filePath must be a string` | Wrong argument type |
18
+ | `File not found: <path>` | File does not exist |
19
+ | `ENOENT` | `clamscan` not installed or not in PATH |
20
+ | `Unexpected exit code: N` | ClamAV internal error |
21
+ | `Process killed by signal: SIGTERM` | Process killed (OOM, timeout) |
22
+ | `clamd connection timed out after Nms` | TCP timeout exceeded |
23
+ | `buffer must be a Buffer` | Wrong argument to `scanBuffer()` |
24
+ | `stream must be a Readable` | Wrong argument to `scanStream()` |
25
+ | `dirPath must be a string` | Wrong argument to `scanDirectory()` |
26
+ | `Directory not found: <path>` | Directory does not exist |
27
+
28
+ These are programming errors or infrastructure failures. Handle them with `try/catch`.
29
+
30
+ ### 2. `Verdict.ScanError` (scan completed, result unknown)
31
+
32
+ `Verdict.ScanError` resolves (does not throw) and indicates ClamAV ran but could not produce a clean/malicious verdict. Common causes: encrypted archives, corrupt files, permission errors, I/O issues.
33
+
34
+ ---
35
+
36
+ ## The secure default: reject on both
37
+
38
+ The safest policy: any outcome other than `Verdict.Clean` results in rejection.
39
+
40
+ ```js
41
+ const { scan, Verdict } = require('pompelmi');
42
+ const fs = require('fs');
43
+
44
+ async function scanAndAccept(filePath) {
45
+ try {
46
+ const result = await scan(filePath, { host: 'clamav', port: 3310 });
47
+
48
+ if (result === Verdict.Malicious) {
49
+ fs.unlinkSync(filePath);
50
+ throw new Error('Malicious file rejected.');
51
+ }
52
+
53
+ if (result === Verdict.ScanError) {
54
+ fs.unlinkSync(filePath);
55
+ throw new Error('Scan incomplete — file rejected as precaution.');
56
+ }
57
+
58
+ return filePath; // Verdict.Clean
59
+ } catch (err) {
60
+ // Covers both scan() rejections and our own thrown Errors above.
61
+ // Delete the file defensively if it still exists.
62
+ try { fs.unlinkSync(filePath); } catch {}
63
+ throw err;
64
+ }
65
+ }
66
+ ```
67
+
68
+ ---
69
+
70
+ ## When to retry `ScanError`
71
+
72
+ A `ScanError` caused by a transient network blip or a momentary clamd overload is worth one retry. A `ScanError` caused by a corrupt file or encrypted archive will always return `ScanError` — retrying wastes time.
73
+
74
+ ```js
75
+ async function scanWithRetry(filePath, opts, retries = 1) {
76
+ for (let attempt = 0; attempt <= retries; attempt++) {
77
+ try {
78
+ const result = await scan(filePath, opts);
79
+ if (result !== Verdict.ScanError || attempt === retries) {
80
+ return result;
81
+ }
82
+ // ScanError on non-final attempt — wait briefly and retry
83
+ await new Promise(r => setTimeout(r, 500));
84
+ } catch (err) {
85
+ if (attempt === retries) throw err;
86
+ }
87
+ }
88
+ }
89
+ ```
90
+
91
+ Do not retry `Verdict.Malicious` — the signature match is deterministic.
92
+
93
+ ---
94
+
95
+ ## Cleanup with `finally`
96
+
97
+ Ensure temp files are always deleted regardless of scan outcome:
98
+
99
+ ```js
100
+ const os = require('os');
101
+ const fs = require('fs');
102
+ const path = require('path');
103
+ const { scan, Verdict } = require('pompelmi');
104
+
105
+ async function scanBuffer_manual(buffer) {
106
+ const tmpPath = path.join(os.tmpdir(), `scan-${Date.now()}.tmp`);
107
+ fs.writeFileSync(tmpPath, buffer);
108
+
109
+ try {
110
+ return await scan(tmpPath);
111
+ } finally {
112
+ try { fs.unlinkSync(tmpPath); } catch {}
113
+ }
114
+ }
115
+ ```
116
+
117
+ `scanBuffer()` handles this `finally` pattern internally in local mode — you don't need to replicate it when using the API directly.
118
+
119
+ ---
120
+
121
+ ## Express error handling pattern
122
+
123
+ ```js
124
+ const express = require('express');
125
+ const multer = require('multer');
126
+ const fs = require('fs');
127
+ const { scan, Verdict } = require('pompelmi');
128
+
129
+ const app = express();
130
+ const upload = multer({ dest: './uploads', limits: { fileSize: 10 * 1024 * 1024 } });
131
+
132
+ app.post('/upload', upload.single('file'), async (req, res, next) => {
133
+ if (!req.file) return res.status(400).json({ error: 'No file uploaded.' });
134
+
135
+ try {
136
+ const result = await scan(req.file.path, { host: 'clamav', port: 3310 });
137
+
138
+ if (result === Verdict.Malicious) {
139
+ fs.unlinkSync(req.file.path);
140
+ return res.status(422).json({ error: 'Malicious file rejected.' });
141
+ }
142
+
143
+ if (result === Verdict.ScanError) {
144
+ fs.unlinkSync(req.file.path);
145
+ return res.status(422).json({ error: 'Scan failed — file rejected.' });
146
+ }
147
+
148
+ return res.json({ ok: true, filename: req.file.filename });
149
+ } catch (err) {
150
+ try { fs.unlinkSync(req.file.path); } catch {}
151
+ next(err); // forward to Express error middleware
152
+ }
153
+ });
154
+
155
+ // Global error handler
156
+ app.use((err, req, res, next) => {
157
+ console.error(err);
158
+ res.status(500).json({ error: 'Internal scan error.' });
159
+ });
160
+ ```
161
+
162
+ ---
163
+
164
+ ## Logging best practices
165
+
166
+ Log rejections with enough context to investigate later — but never log file contents.
167
+
168
+ ```js
169
+ const logger = require('./logger'); // pino, winston, etc.
170
+
171
+ if (result === Verdict.Malicious) {
172
+ logger.warn({
173
+ event: 'malware_detected',
174
+ filePath,
175
+ originalname: req.file.originalname,
176
+ mimetype: req.file.mimetype,
177
+ size: req.file.size,
178
+ userId: req.user?.id,
179
+ ip: req.ip,
180
+ });
181
+ }
182
+ ```
183
+
184
+ For `ScanError`:
185
+
186
+ ```js
187
+ if (result === Verdict.ScanError) {
188
+ logger.warn({
189
+ event: 'scan_error',
190
+ filePath,
191
+ mimetype: req.file.mimetype,
192
+ size: req.file.size,
193
+ });
194
+ }
195
+ ```
196
+
197
+ For scan function rejections:
198
+
199
+ ```js
200
+ } catch (err) {
201
+ logger.error({
202
+ event: 'scan_threw',
203
+ message: err.message,
204
+ filePath,
205
+ });
206
+ }
207
+ ```
208
+
209
+ ---
210
+
211
+ ## HTTP status code conventions
212
+
213
+ | Situation | Recommended status |
214
+ |-----------|-------------------|
215
+ | No file in request | `400 Bad Request` |
216
+ | Wrong argument (programming error) | `400 Bad Request` |
217
+ | `Verdict.Malicious` | `422 Unprocessable Entity` |
218
+ | `Verdict.ScanError` (reject policy) | `422 Unprocessable Entity` |
219
+ | `scan()` throws (infra error) | `500 Internal Server Error` |
220
+ | File too large (pre-scan) | `413 Content Too Large` |
221
+ | `Verdict.Clean` | `200 OK` / `201 Created` |
222
+
223
+ ---
224
+
225
+ ## `scanDirectory()` error handling
226
+
227
+ Per-file failures in `scanDirectory()` go into the `errors` array — the function itself only rejects on argument errors or missing directory.
228
+
229
+ ```js
230
+ const { scanDirectory } = require('pompelmi');
231
+
232
+ try {
233
+ const results = await scanDirectory('/uploads');
234
+ if (results.errors.length > 0) {
235
+ logger.warn({ event: 'scan_errors', paths: results.errors });
236
+ // Decide: reject the whole batch, or only reject the errored files
237
+ }
238
+ } catch (err) {
239
+ // dirPath not a string, or directory not found
240
+ logger.error({ event: 'scan_threw', message: err.message });
241
+ }
242
+ ```
@@ -0,0 +1,227 @@
1
+ # Express Integration
2
+
3
+ This page covers integrating pompelmi into an Express application for file upload scanning — both disk storage (scan by path) and memory storage (scan by buffer).
4
+
5
+ ---
6
+
7
+ ## Setup
8
+
9
+ ```bash
10
+ npm install pompelmi multer express
11
+ ```
12
+
13
+ ---
14
+
15
+ ## Disk storage (scan by file path)
16
+
17
+ multer writes the uploaded file to disk before your route handler runs. Call `scan(req.file.path)` to scan it.
18
+
19
+ ```js
20
+ const express = require('express');
21
+ const multer = require('multer');
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { scan, Verdict } = require('pompelmi');
25
+
26
+ const upload = multer({ dest: path.join(__dirname, 'uploads') });
27
+ const app = express();
28
+
29
+ app.post('/upload', upload.single('file'), async (req, res) => {
30
+ if (!req.file) {
31
+ return res.status(400).json({ error: 'No file uploaded.' });
32
+ }
33
+
34
+ const filePath = req.file.path;
35
+
36
+ try {
37
+ const result = await scan(filePath);
38
+
39
+ if (result === Verdict.Malicious) {
40
+ fs.unlinkSync(filePath);
41
+ return res.status(422).json({ error: 'Malicious file rejected.' });
42
+ }
43
+
44
+ if (result === Verdict.ScanError) {
45
+ fs.unlinkSync(filePath);
46
+ return res.status(422).json({ error: 'Scan incomplete — file rejected as precaution.' });
47
+ }
48
+
49
+ // Verdict.Clean — rename to final destination or store as-is
50
+ return res.json({ ok: true, filename: req.file.filename });
51
+ } catch (err) {
52
+ // clamscan not in PATH, file not found, process killed, etc.
53
+ try { fs.unlinkSync(filePath); } catch {}
54
+ return res.status(500).json({ error: `Scan failed: ${err.message}` });
55
+ }
56
+ });
57
+
58
+ app.listen(3000);
59
+ ```
60
+
61
+ ### With TCP mode (Docker sidecar)
62
+
63
+ ```js
64
+ const SCAN_OPTS = {
65
+ host: process.env.CLAMAV_HOST || '127.0.0.1',
66
+ port: Number(process.env.CLAMAV_PORT) || 3310,
67
+ timeout: 30_000,
68
+ };
69
+
70
+ const result = await scan(filePath, SCAN_OPTS);
71
+ ```
72
+
73
+ ---
74
+
75
+ ## Memory storage (scan by buffer)
76
+
77
+ When you use `multer({ storage: multer.memoryStorage() })`, the file is never written to disk — it lives in `req.file.buffer`. Use `scanBuffer()` instead of `scan()`.
78
+
79
+ ```js
80
+ const express = require('express');
81
+ const multer = require('multer');
82
+ const { scanBuffer, Verdict } = require('pompelmi');
83
+
84
+ const upload = multer({ storage: multer.memoryStorage() });
85
+ const app = express();
86
+
87
+ const SCAN_OPTS = {
88
+ host: process.env.CLAMAV_HOST,
89
+ port: 3310,
90
+ };
91
+
92
+ app.post('/upload', upload.single('file'), async (req, res) => {
93
+ if (!req.file) {
94
+ return res.status(400).json({ error: 'No file uploaded.' });
95
+ }
96
+
97
+ try {
98
+ const result = await scanBuffer(req.file.buffer, SCAN_OPTS);
99
+
100
+ if (result === Verdict.Malicious) {
101
+ return res.status(422).json({ error: 'Malicious file rejected.' });
102
+ }
103
+
104
+ if (result === Verdict.ScanError) {
105
+ return res.status(422).json({ error: 'Scan incomplete — file rejected.' });
106
+ }
107
+
108
+ // Clean — forward buffer to storage (S3, database, disk)
109
+ return res.json({ ok: true, originalname: req.file.originalname });
110
+ } catch (err) {
111
+ return res.status(500).json({ error: `Scan failed: ${err.message}` });
112
+ }
113
+ });
114
+
115
+ app.listen(3000);
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Scanning multiple files in one request
121
+
122
+ ```js
123
+ app.post('/upload-many', upload.array('files', 10), async (req, res) => {
124
+ if (!req.files || req.files.length === 0) {
125
+ return res.status(400).json({ error: 'No files uploaded.' });
126
+ }
127
+
128
+ const results = await Promise.allSettled(
129
+ req.files.map(async (file) => {
130
+ const verdict = await scan(file.path, SCAN_OPTS);
131
+ return { file, verdict };
132
+ })
133
+ );
134
+
135
+ const rejected = [];
136
+ const accepted = [];
137
+
138
+ for (const r of results) {
139
+ if (r.status === 'rejected') {
140
+ rejected.push({ filename: '?', reason: r.reason.message });
141
+ continue;
142
+ }
143
+ const { file, verdict } = r.value;
144
+ if (verdict !== Verdict.Clean) {
145
+ try { fs.unlinkSync(file.path); } catch {}
146
+ rejected.push({ filename: file.originalname, reason: verdict.description });
147
+ } else {
148
+ accepted.push(file.originalname);
149
+ }
150
+ }
151
+
152
+ if (rejected.length > 0) {
153
+ return res.status(422).json({ accepted, rejected });
154
+ }
155
+ return res.json({ ok: true, accepted });
156
+ });
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Centralised error handling middleware
162
+
163
+ Extract scan logic into middleware for reuse across routes:
164
+
165
+ ```js
166
+ async function scanUpload(req, res, next) {
167
+ if (!req.file) return next();
168
+
169
+ const filePath = req.file.path;
170
+
171
+ try {
172
+ const result = await scan(filePath, SCAN_OPTS);
173
+
174
+ if (result !== Verdict.Clean) {
175
+ try { fs.unlinkSync(filePath); } catch {}
176
+ const status = result === Verdict.Malicious ? 422 : 422;
177
+ return res.status(status).json({ error: `Upload rejected: ${result.description}` });
178
+ }
179
+
180
+ next();
181
+ } catch (err) {
182
+ try { fs.unlinkSync(filePath); } catch {}
183
+ next(err);
184
+ }
185
+ }
186
+
187
+ // Use it
188
+ app.post('/profile-photo', upload.single('photo'), scanUpload, (req, res) => {
189
+ res.json({ ok: true, path: req.file.path });
190
+ });
191
+ ```
192
+
193
+ ---
194
+
195
+ ## HTTP status codes
196
+
197
+ | Situation | Status |
198
+ |-----------|--------|
199
+ | No file in request | `400 Bad Request` |
200
+ | `Verdict.Malicious` | `422 Unprocessable Entity` |
201
+ | `Verdict.ScanError` | `422 Unprocessable Entity` |
202
+ | `scan()` throws | `500 Internal Server Error` |
203
+ | `Verdict.Clean` | `200 OK` (or `201 Created` after storing) |
204
+
205
+ ---
206
+
207
+ ## File size limits
208
+
209
+ Always set a file size limit on multer to prevent large uploads from exhausting memory or disk:
210
+
211
+ ```js
212
+ const upload = multer({
213
+ dest: './uploads',
214
+ limits: { fileSize: 10 * 1024 * 1024 }, // 10 MB
215
+ });
216
+ ```
217
+
218
+ multer returns a `MulterError` (subclass of `Error`) when the limit is exceeded. Handle it in your error middleware:
219
+
220
+ ```js
221
+ app.use((err, req, res, next) => {
222
+ if (err.code === 'LIMIT_FILE_SIZE') {
223
+ return res.status(413).json({ error: 'File too large.' });
224
+ }
225
+ next(err);
226
+ });
227
+ ```
@@ -0,0 +1,207 @@
1
+ # Fastify Integration
2
+
3
+ Complete guide to integrating pompelmi into a Fastify application. Covers disk-based scanning, stream-based scanning with `scanStream()`, and error handling.
4
+
5
+ ---
6
+
7
+ ## Setup
8
+
9
+ ```bash
10
+ npm install pompelmi fastify @fastify/multipart
11
+ ```
12
+
13
+ ---
14
+
15
+ ## Disk-based scanning (save to disk first, then scan)
16
+
17
+ This pattern saves the uploaded file to disk via `pipeline`, then scans it by path. Most straightforward for large files.
18
+
19
+ ```js
20
+ const Fastify = require('fastify');
21
+ const { pipeline } = require('stream/promises');
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { scan, Verdict } = require('pompelmi');
25
+
26
+ const app = Fastify({ logger: true });
27
+ app.register(require('@fastify/multipart'));
28
+
29
+ const SCAN_OPTS = {
30
+ host: process.env.CLAMAV_HOST,
31
+ port: Number(process.env.CLAMAV_PORT) || 3310,
32
+ timeout: 30_000,
33
+ };
34
+
35
+ app.post('/upload', async (req, reply) => {
36
+ const data = await req.file();
37
+ const filePath = path.join('./uploads', `${Date.now()}-${data.filename}`);
38
+
39
+ // Write to disk
40
+ await pipeline(data.file, fs.createWriteStream(filePath));
41
+
42
+ let result;
43
+ try {
44
+ result = await scan(filePath, SCAN_OPTS);
45
+ } catch (err) {
46
+ try { fs.unlinkSync(filePath); } catch {}
47
+ return reply.code(500).send({ error: `Scan failed: ${err.message}` });
48
+ }
49
+
50
+ if (result !== Verdict.Clean) {
51
+ try { fs.unlinkSync(filePath); } catch {}
52
+ return reply.code(422).send({ error: `Upload rejected: ${result.description}` });
53
+ }
54
+
55
+ return reply.send({ ok: true, filename: path.basename(filePath) });
56
+ });
57
+
58
+ app.listen({ port: 3000 });
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Stream-based scanning (no disk I/O in TCP mode)
64
+
65
+ When using TCP mode (clamd sidecar), pass the upload stream directly to `scanStream()`. The file never touches the application host's disk.
66
+
67
+ ```js
68
+ const Fastify = require('fastify');
69
+ const { scanStream, Verdict } = require('pompelmi');
70
+
71
+ const app = Fastify({ logger: true });
72
+ app.register(require('@fastify/multipart'));
73
+
74
+ const SCAN_OPTS = {
75
+ host: process.env.CLAMAV_HOST || 'clamav',
76
+ port: 3310,
77
+ timeout: 30_000,
78
+ };
79
+
80
+ app.post('/upload', async (req, reply) => {
81
+ const data = await req.file();
82
+
83
+ let result;
84
+ try {
85
+ result = await scanStream(data.file, SCAN_OPTS);
86
+ } catch (err) {
87
+ return reply.code(500).send({ error: `Scan failed: ${err.message}` });
88
+ }
89
+
90
+ if (result === Verdict.Malicious) {
91
+ return reply.code(422).send({ error: 'Malicious file rejected.' });
92
+ }
93
+
94
+ if (result === Verdict.ScanError) {
95
+ return reply.code(422).send({ error: 'Scan incomplete — file rejected.' });
96
+ }
97
+
98
+ // Stream is consumed — if you need to store the file you must re-read it.
99
+ // With stream scanning, save to storage (S3, disk) in a separate step
100
+ // using the original source, not data.file (already consumed).
101
+ return reply.send({ ok: true, filename: data.filename });
102
+ });
103
+
104
+ app.listen({ port: 3000 });
105
+ ```
106
+
107
+ > **Note on stream consumption:** Once `scanStream()` consumes `data.file`, the stream is exhausted. If you need to store the file after scanning, either save it to disk first (disk-based pattern) or scan and upload to S3 in parallel using a passthrough stream.
108
+
109
+ ---
110
+
111
+ ## Scanning multiple files
112
+
113
+ ```js
114
+ app.post('/upload-many', async (req, reply) => {
115
+ const parts = req.files();
116
+ const results = [];
117
+
118
+ for await (const part of parts) {
119
+ const filePath = path.join('./uploads', `${Date.now()}-${part.filename}`);
120
+ await pipeline(part.file, fs.createWriteStream(filePath));
121
+
122
+ const verdict = await scan(filePath, SCAN_OPTS).catch(err => {
123
+ try { fs.unlinkSync(filePath); } catch {}
124
+ return null;
125
+ });
126
+
127
+ if (!verdict || verdict !== Verdict.Clean) {
128
+ try { fs.unlinkSync(filePath); } catch {}
129
+ results.push({ filename: part.filename, accepted: false, reason: verdict?.description ?? 'scan_error' });
130
+ } else {
131
+ results.push({ filename: part.filename, accepted: true });
132
+ }
133
+ }
134
+
135
+ const anyRejected = results.some(r => !r.accepted);
136
+ return reply.code(anyRejected ? 422 : 200).send({ results });
137
+ });
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Error handling with `setErrorHandler`
143
+
144
+ Register a global error handler for unexpected failures:
145
+
146
+ ```js
147
+ app.setErrorHandler((err, req, reply) => {
148
+ req.log.error(err);
149
+ reply.code(500).send({ error: 'Internal server error.' });
150
+ });
151
+ ```
152
+
153
+ For multer-equivalent size limits with `@fastify/multipart`:
154
+
155
+ ```js
156
+ app.register(require('@fastify/multipart'), {
157
+ limits: {
158
+ fileSize: 10 * 1024 * 1024, // 10 MB
159
+ },
160
+ });
161
+
162
+ app.setErrorHandler((err, req, reply) => {
163
+ if (err.code === 'FST_REQ_FILE_TOO_LARGE') {
164
+ return reply.code(413).send({ error: 'File too large.' });
165
+ }
166
+ reply.code(500).send({ error: err.message });
167
+ });
168
+ ```
169
+
170
+ ---
171
+
172
+ ## HTTP status codes
173
+
174
+ | Situation | Status |
175
+ |-----------|--------|
176
+ | No file part in request | `400 Bad Request` |
177
+ | `Verdict.Malicious` | `422 Unprocessable Entity` |
178
+ | `Verdict.ScanError` | `422 Unprocessable Entity` |
179
+ | `scan()` / `scanStream()` throws | `500 Internal Server Error` |
180
+ | File too large | `413 Content Too Large` |
181
+ | `Verdict.Clean` | `200 OK` |
182
+
183
+ ---
184
+
185
+ ## TypeScript
186
+
187
+ ```ts
188
+ import Fastify from 'fastify';
189
+ import multipart from '@fastify/multipart';
190
+ import { scanStream, Verdict } from 'pompelmi';
191
+
192
+ const app = Fastify();
193
+ app.register(multipart);
194
+
195
+ app.post('/upload', async (req, reply) => {
196
+ const data = await req.file();
197
+ if (!data) return reply.code(400).send({ error: 'No file.' });
198
+
199
+ const result = await scanStream(data.file, { host: 'clamav', port: 3310 });
200
+
201
+ if (result !== Verdict.Clean) {
202
+ return reply.code(422).send({ error: result.description });
203
+ }
204
+
205
+ return reply.send({ ok: true });
206
+ });
207
+ ```
package/wiki/home.md ADDED
File without changes