pompelmi 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ # Express Integration
2
+
3
+ This page covers integrating pompelmi into an Express application for file upload scanning — both disk storage (scan by path) and memory storage (scan by buffer).
4
+
5
+ ---
6
+
7
+ ## Setup
8
+
9
+ ```bash
10
+ npm install pompelmi multer express
11
+ ```
12
+
13
+ ---
14
+
15
+ ## Disk storage (scan by file path)
16
+
17
+ multer writes the uploaded file to disk before your route handler runs. Call `scan(req.file.path)` to scan it.
18
+
19
+ ```js
20
+ const express = require('express');
21
+ const multer = require('multer');
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { scan, Verdict } = require('pompelmi');
25
+
26
+ const upload = multer({ dest: path.join(__dirname, 'uploads') });
27
+ const app = express();
28
+
29
+ app.post('/upload', upload.single('file'), async (req, res) => {
30
+ if (!req.file) {
31
+ return res.status(400).json({ error: 'No file uploaded.' });
32
+ }
33
+
34
+ const filePath = req.file.path;
35
+
36
+ try {
37
+ const result = await scan(filePath);
38
+
39
+ if (result === Verdict.Malicious) {
40
+ fs.unlinkSync(filePath);
41
+ return res.status(422).json({ error: 'Malicious file rejected.' });
42
+ }
43
+
44
+ if (result === Verdict.ScanError) {
45
+ fs.unlinkSync(filePath);
46
+ return res.status(422).json({ error: 'Scan incomplete — file rejected as precaution.' });
47
+ }
48
+
49
+ // Verdict.Clean — rename to final destination or store as-is
50
+ return res.json({ ok: true, filename: req.file.filename });
51
+ } catch (err) {
52
+ // clamscan not in PATH, file not found, process killed, etc.
53
+ try { fs.unlinkSync(filePath); } catch {}
54
+ return res.status(500).json({ error: `Scan failed: ${err.message}` });
55
+ }
56
+ });
57
+
58
+ app.listen(3000);
59
+ ```
60
+
61
+ ### With TCP mode (Docker sidecar)
62
+
63
+ ```js
64
+ const SCAN_OPTS = {
65
+ host: process.env.CLAMAV_HOST || '127.0.0.1',
66
+ port: Number(process.env.CLAMAV_PORT) || 3310,
67
+ timeout: 30_000,
68
+ };
69
+
70
+ const result = await scan(filePath, SCAN_OPTS);
71
+ ```
72
+
73
+ ---
74
+
75
+ ## Memory storage (scan by buffer)
76
+
77
+ When you use `multer({ storage: multer.memoryStorage() })`, the file is never written to disk — it lives in `req.file.buffer`. Use `scanBuffer()` instead of `scan()`.
78
+
79
+ ```js
80
+ const express = require('express');
81
+ const multer = require('multer');
82
+ const { scanBuffer, Verdict } = require('pompelmi');
83
+
84
+ const upload = multer({ storage: multer.memoryStorage() });
85
+ const app = express();
86
+
87
+ const SCAN_OPTS = {
88
+ host: process.env.CLAMAV_HOST,
89
+ port: 3310,
90
+ };
91
+
92
+ app.post('/upload', upload.single('file'), async (req, res) => {
93
+ if (!req.file) {
94
+ return res.status(400).json({ error: 'No file uploaded.' });
95
+ }
96
+
97
+ try {
98
+ const result = await scanBuffer(req.file.buffer, SCAN_OPTS);
99
+
100
+ if (result === Verdict.Malicious) {
101
+ return res.status(422).json({ error: 'Malicious file rejected.' });
102
+ }
103
+
104
+ if (result === Verdict.ScanError) {
105
+ return res.status(422).json({ error: 'Scan incomplete — file rejected.' });
106
+ }
107
+
108
+ // Clean — forward buffer to storage (S3, database, disk)
109
+ return res.json({ ok: true, originalname: req.file.originalname });
110
+ } catch (err) {
111
+ return res.status(500).json({ error: `Scan failed: ${err.message}` });
112
+ }
113
+ });
114
+
115
+ app.listen(3000);
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Scanning multiple files in one request
121
+
122
+ ```js
123
+ app.post('/upload-many', upload.array('files', 10), async (req, res) => {
124
+ if (!req.files || req.files.length === 0) {
125
+ return res.status(400).json({ error: 'No files uploaded.' });
126
+ }
127
+
128
+ const results = await Promise.allSettled(
129
+ req.files.map(async (file) => {
130
+ const verdict = await scan(file.path, SCAN_OPTS);
131
+ return { file, verdict };
132
+ })
133
+ );
134
+
135
+ const rejected = [];
136
+ const accepted = [];
137
+
138
+ for (const r of results) {
139
+ if (r.status === 'rejected') {
140
+ rejected.push({ filename: '?', reason: r.reason.message });
141
+ continue;
142
+ }
143
+ const { file, verdict } = r.value;
144
+ if (verdict !== Verdict.Clean) {
145
+ try { fs.unlinkSync(file.path); } catch {}
146
+ rejected.push({ filename: file.originalname, reason: verdict.description });
147
+ } else {
148
+ accepted.push(file.originalname);
149
+ }
150
+ }
151
+
152
+ if (rejected.length > 0) {
153
+ return res.status(422).json({ accepted, rejected });
154
+ }
155
+ return res.json({ ok: true, accepted });
156
+ });
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Centralised error handling middleware
162
+
163
+ Extract scan logic into middleware for reuse across routes:
164
+
165
+ ```js
166
+ async function scanUpload(req, res, next) {
167
+ if (!req.file) return next();
168
+
169
+ const filePath = req.file.path;
170
+
171
+ try {
172
+ const result = await scan(filePath, SCAN_OPTS);
173
+
174
+ if (result !== Verdict.Clean) {
175
+ try { fs.unlinkSync(filePath); } catch {}
176
+ const status = result === Verdict.Malicious ? 422 : 422;
177
+ return res.status(status).json({ error: `Upload rejected: ${result.description}` });
178
+ }
179
+
180
+ next();
181
+ } catch (err) {
182
+ try { fs.unlinkSync(filePath); } catch {}
183
+ next(err);
184
+ }
185
+ }
186
+
187
+ // Use it
188
+ app.post('/profile-photo', upload.single('photo'), scanUpload, (req, res) => {
189
+ res.json({ ok: true, path: req.file.path });
190
+ });
191
+ ```
192
+
193
+ ---
194
+
195
+ ## HTTP status codes
196
+
197
+ | Situation | Status |
198
+ |-----------|--------|
199
+ | No file in request | `400 Bad Request` |
200
+ | `Verdict.Malicious` | `422 Unprocessable Entity` |
201
+ | `Verdict.ScanError` | `422 Unprocessable Entity` |
202
+ | `scan()` throws | `500 Internal Server Error` |
203
+ | `Verdict.Clean` | `200 OK` (or `201 Created` after storing) |
204
+
205
+ ---
206
+
207
+ ## File size limits
208
+
209
+ Always set a file size limit on multer to prevent large uploads from exhausting memory or disk:
210
+
211
+ ```js
212
+ const upload = multer({
213
+ dest: './uploads',
214
+ limits: { fileSize: 10 * 1024 * 1024 }, // 10 MB
215
+ });
216
+ ```
217
+
218
+ multer returns a `MulterError` (subclass of `Error`) when the limit is exceeded. Handle it in your error middleware:
219
+
220
+ ```js
221
+ app.use((err, req, res, next) => {
222
+ if (err.code === 'LIMIT_FILE_SIZE') {
223
+ return res.status(413).json({ error: 'File too large.' });
224
+ }
225
+ next(err);
226
+ });
227
+ ```
@@ -0,0 +1,207 @@
1
+ # Fastify Integration
2
+
3
+ Complete guide to integrating pompelmi into a Fastify application. Covers disk-based scanning, stream-based scanning with `scanStream()`, and error handling.
4
+
5
+ ---
6
+
7
+ ## Setup
8
+
9
+ ```bash
10
+ npm install pompelmi fastify @fastify/multipart
11
+ ```
12
+
13
+ ---
14
+
15
+ ## Disk-based scanning (save to disk first, then scan)
16
+
17
+ This pattern saves the uploaded file to disk via `pipeline`, then scans it by path. Most straightforward for large files.
18
+
19
+ ```js
20
+ const Fastify = require('fastify');
21
+ const { pipeline } = require('stream/promises');
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { scan, Verdict } = require('pompelmi');
25
+
26
+ const app = Fastify({ logger: true });
27
+ app.register(require('@fastify/multipart'));
28
+
29
+ const SCAN_OPTS = {
30
+ host: process.env.CLAMAV_HOST,
31
+ port: Number(process.env.CLAMAV_PORT) || 3310,
32
+ timeout: 30_000,
33
+ };
34
+
35
+ app.post('/upload', async (req, reply) => {
36
+ const data = await req.file();
37
+ const filePath = path.join('./uploads', `${Date.now()}-${data.filename}`);
38
+
39
+ // Write to disk
40
+ await pipeline(data.file, fs.createWriteStream(filePath));
41
+
42
+ let result;
43
+ try {
44
+ result = await scan(filePath, SCAN_OPTS);
45
+ } catch (err) {
46
+ try { fs.unlinkSync(filePath); } catch {}
47
+ return reply.code(500).send({ error: `Scan failed: ${err.message}` });
48
+ }
49
+
50
+ if (result !== Verdict.Clean) {
51
+ try { fs.unlinkSync(filePath); } catch {}
52
+ return reply.code(422).send({ error: `Upload rejected: ${result.description}` });
53
+ }
54
+
55
+ return reply.send({ ok: true, filename: path.basename(filePath) });
56
+ });
57
+
58
+ app.listen({ port: 3000 });
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Stream-based scanning (no disk I/O in TCP mode)
64
+
65
+ When using TCP mode (clamd sidecar), pass the upload stream directly to `scanStream()`. The file never touches the application host's disk.
66
+
67
+ ```js
68
+ const Fastify = require('fastify');
69
+ const { scanStream, Verdict } = require('pompelmi');
70
+
71
+ const app = Fastify({ logger: true });
72
+ app.register(require('@fastify/multipart'));
73
+
74
+ const SCAN_OPTS = {
75
+ host: process.env.CLAMAV_HOST || 'clamav',
76
+ port: 3310,
77
+ timeout: 30_000,
78
+ };
79
+
80
+ app.post('/upload', async (req, reply) => {
81
+ const data = await req.file();
82
+
83
+ let result;
84
+ try {
85
+ result = await scanStream(data.file, SCAN_OPTS);
86
+ } catch (err) {
87
+ return reply.code(500).send({ error: `Scan failed: ${err.message}` });
88
+ }
89
+
90
+ if (result === Verdict.Malicious) {
91
+ return reply.code(422).send({ error: 'Malicious file rejected.' });
92
+ }
93
+
94
+ if (result === Verdict.ScanError) {
95
+ return reply.code(422).send({ error: 'Scan incomplete — file rejected.' });
96
+ }
97
+
98
+ // Stream is consumed — if you need to store the file you must re-read it.
99
+ // With stream scanning, save to storage (S3, disk) in a separate step
100
+ // using the original source, not data.file (already consumed).
101
+ return reply.send({ ok: true, filename: data.filename });
102
+ });
103
+
104
+ app.listen({ port: 3000 });
105
+ ```
106
+
107
+ > **Note on stream consumption:** Once `scanStream()` consumes `data.file`, the stream is exhausted. If you need to store the file after scanning, either save it to disk first (disk-based pattern) or scan and upload to S3 in parallel using a passthrough stream.
108
+
109
+ ---
110
+
111
+ ## Scanning multiple files
112
+
113
+ ```js
114
+ app.post('/upload-many', async (req, reply) => {
115
+ const parts = req.files();
116
+ const results = [];
117
+
118
+ for await (const part of parts) {
119
+ const filePath = path.join('./uploads', `${Date.now()}-${part.filename}`);
120
+ await pipeline(part.file, fs.createWriteStream(filePath));
121
+
122
+ const verdict = await scan(filePath, SCAN_OPTS).catch(err => {
123
+ try { fs.unlinkSync(filePath); } catch {}
124
+ return null;
125
+ });
126
+
127
+ if (!verdict || verdict !== Verdict.Clean) {
128
+ try { fs.unlinkSync(filePath); } catch {}
129
+ results.push({ filename: part.filename, accepted: false, reason: verdict?.description ?? 'scan_error' });
130
+ } else {
131
+ results.push({ filename: part.filename, accepted: true });
132
+ }
133
+ }
134
+
135
+ const anyRejected = results.some(r => !r.accepted);
136
+ return reply.code(anyRejected ? 422 : 200).send({ results });
137
+ });
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Error handling with `setErrorHandler`
143
+
144
+ Register a global error handler for unexpected failures:
145
+
146
+ ```js
147
+ app.setErrorHandler((err, req, reply) => {
148
+ req.log.error(err);
149
+ reply.code(500).send({ error: 'Internal server error.' });
150
+ });
151
+ ```
152
+
153
+ For multer-equivalent size limits with `@fastify/multipart`:
154
+
155
+ ```js
156
+ app.register(require('@fastify/multipart'), {
157
+ limits: {
158
+ fileSize: 10 * 1024 * 1024, // 10 MB
159
+ },
160
+ });
161
+
162
+ app.setErrorHandler((err, req, reply) => {
163
+ if (err.code === 'FST_REQ_FILE_TOO_LARGE') {
164
+ return reply.code(413).send({ error: 'File too large.' });
165
+ }
166
+ reply.code(500).send({ error: err.message });
167
+ });
168
+ ```
169
+
170
+ ---
171
+
172
+ ## HTTP status codes
173
+
174
+ | Situation | Status |
175
+ |-----------|--------|
176
+ | No file part in request | `400 Bad Request` |
177
+ | `Verdict.Malicious` | `422 Unprocessable Entity` |
178
+ | `Verdict.ScanError` | `422 Unprocessable Entity` |
179
+ | `scan()` / `scanStream()` throws | `500 Internal Server Error` |
180
+ | File too large | `413 Content Too Large` |
181
+ | `Verdict.Clean` | `200 OK` |
182
+
183
+ ---
184
+
185
+ ## TypeScript
186
+
187
+ ```ts
188
+ import Fastify from 'fastify';
189
+ import multipart from '@fastify/multipart';
190
+ import { scanStream, Verdict } from 'pompelmi';
191
+
192
+ const app = Fastify();
193
+ app.register(multipart);
194
+
195
+ app.post('/upload', async (req, reply) => {
196
+ const data = await req.file();
197
+ if (!data) return reply.code(400).send({ error: 'No file.' });
198
+
199
+ const result = await scanStream(data.file, { host: 'clamav', port: 3310 });
200
+
201
+ if (result !== Verdict.Clean) {
202
+ return reply.code(422).send({ error: result.description });
203
+ }
204
+
205
+ return reply.send({ ok: true });
206
+ });
207
+ ```
package/wiki/home.md ADDED
File without changes
@@ -0,0 +1,179 @@
1
+ # Local vs TCP Mode
2
+
3
+ pompelmi supports two scanning modes. Which one you use is controlled entirely by the options you pass — no configuration files, no environment flags in pompelmi itself.
4
+
5
+ ---
6
+
7
+ ## Summary
8
+
9
+ | | Local mode | TCP mode |
10
+ |---|---|---|
11
+ | **How it works** | Spawns `clamscan` as a child process | Streams file to `clamd` daemon over TCP |
12
+ | **ClamAV requirement** | `clamscan` binary in PATH | Running `clamd` daemon reachable over TCP |
13
+ | **Startup time** | Slow — loads virus DB each invocation | Fast — daemon keeps DB in memory |
14
+ | **Throughput** | Low — one process per scan | High — persistent connection |
15
+ | **Disk I/O** | Reads file from disk | Reads file from disk (or buffer/stream with no disk) |
16
+ | **Docker** | Requires ClamAV in app container | Use ClamAV as a sidecar |
17
+ | **Zero-copy scan** | Not possible | `scanBuffer()` and `scanStream()` with no disk I/O |
18
+
19
+ ---
20
+
21
+ ## Enabling local mode
22
+
23
+ Do not pass `host` or `port`. pompelmi spawns `clamscan --no-summary <filePath>`:
24
+
25
+ ```js
26
+ const { scan, Verdict } = require('pompelmi');
27
+
28
+ // Local mode — no options, or empty options
29
+ const result = await scan('/uploads/file.pdf');
30
+ const result = await scan('/uploads/file.pdf', {});
31
+ ```
32
+
33
+ `clamscan` must be in `PATH`. Install it with:
34
+
35
+ ```bash
36
+ # macOS
37
+ brew install clamav && freshclam
38
+
39
+ # Linux
40
+ sudo apt-get install -y clamav && sudo freshclam
41
+ ```
42
+
43
+ ---
44
+
45
+ ## Enabling TCP mode
46
+
47
+ Pass `host` (and optionally `port`) to any scan function:
48
+
49
+ ```js
50
+ const result = await scan('/uploads/file.pdf', {
51
+ host: '127.0.0.1',
52
+ port: 3310, // default 3310
53
+ timeout: 30_000, // socket idle timeout ms, default 15000
54
+ });
55
+ ```
56
+
57
+ Setting `host` switches all four functions — `scan`, `scanBuffer`, `scanStream`, `scanDirectory` — to TCP mode.
58
+
59
+ ---
60
+
61
+ ## How local mode works
62
+
63
+ ```
64
+ pompelmi OS
65
+ │ │
66
+ ├── spawn clamscan ─┤
67
+ │ ├── load virus DB (~300 MB) into memory
68
+ │ ├── scan file
69
+ │ ├── exit 0 / 1 / 2
70
+ ├── read exit code ─┘
71
+
72
+ └── resolve Verdict
73
+ ```
74
+
75
+ Each scan call:
76
+ 1. Spawns a new `clamscan` process
77
+ 2. `clamscan` loads the full virus database into memory
78
+ 3. Scans the file
79
+ 4. Exits with code 0 (clean), 1 (malicious), or 2 (error)
80
+ 5. pompelmi maps the exit code to a Verdict Symbol
81
+
82
+ **Typical latency:** 400–800 ms per scan (dominated by database load time).
83
+
84
+ ---
85
+
86
+ ## How TCP mode works
87
+
88
+ ```
89
+ pompelmi clamd daemon
90
+ │ │
91
+ ├── TCP connect ───►│ (keep-alive daemon)
92
+ ├── INSTREAM ───────┤
93
+ ├── stream chunks ──┤ scan in memory
94
+ │ ├── "stream: OK" / "stream: X FOUND"
95
+ ├── read response ──┘
96
+
97
+ └── resolve Verdict
98
+ ```
99
+
100
+ Each scan call:
101
+ 1. Opens a TCP connection to clamd
102
+ 2. Sends `zINSTREAM\0` command
103
+ 3. Streams the file in 64 KB chunks, each prefixed with a 4-byte big-endian length header
104
+ 4. Sends 4 zero bytes to signal end of stream
105
+ 5. Reads the response line
106
+ 6. Maps response to Verdict Symbol
107
+
108
+ **Typical latency:** 5–50 ms per scan (clamd keeps DB in memory; network is the bottleneck).
109
+
110
+ ---
111
+
112
+ ## Performance comparison
113
+
114
+ | Metric | Local mode | TCP mode |
115
+ |--------|-----------|----------|
116
+ | First scan latency | ~600 ms | ~10 ms |
117
+ | Subsequent scan latency | ~600 ms | ~10 ms |
118
+ | Concurrent scans (4-core) | ~4 (CPU-bound) | ~50+ |
119
+ | Memory per scan | ~300 MB (DB load) | ~0 (clamd holds DB) |
120
+
121
+ Local mode is fine for low-traffic applications (< 10 uploads/minute). TCP mode is required for any sustained upload throughput.
122
+
123
+ ---
124
+
125
+ ## Switching modes without changing application code
126
+
127
+ Structure your options from environment variables so the same code runs in local mode during development and TCP mode in production:
128
+
129
+ ```js
130
+ const SCAN_OPTS = process.env.CLAMAV_HOST
131
+ ? {
132
+ host: process.env.CLAMAV_HOST,
133
+ port: Number(process.env.CLAMAV_PORT) || 3310,
134
+ timeout: Number(process.env.CLAMAV_TIMEOUT) || 15_000,
135
+ }
136
+ : {}; // local mode — empty options
137
+
138
+ const result = await scan('/uploads/file.pdf', SCAN_OPTS);
139
+ ```
140
+
141
+ Set `CLAMAV_HOST=clamav` in your Docker environment; leave it unset in local development.
142
+
143
+ ---
144
+
145
+ ## Timeout differences
146
+
147
+ | | Local mode | TCP mode |
148
+ |---|---|---|
149
+ | **`timeout` option** | Ignored | Socket idle timeout in ms |
150
+ | **Default timeout** | OS process timeout | 15 000 ms |
151
+ | **Timeout error** | `Process killed by signal: SIGTERM` | `clamd connection timed out after Nms` |
152
+
153
+ In local mode, the process runs until `clamscan` finishes or the OS kills it. In TCP mode, pompelmi sets a socket idle timeout — if clamd stops sending data for longer than `timeout` ms, the connection is closed and the promise rejects.
154
+
155
+ ---
156
+
157
+ ## Error behaviour differences
158
+
159
+ | Condition | Local mode error | TCP mode error |
160
+ |-----------|-----------------|----------------|
161
+ | Service unavailable | `ENOENT` (clamscan not found) | `ECONNREFUSED` |
162
+ | Service slow | Process runs to completion | `clamd connection timed out` |
163
+ | File not scannable | `Verdict.ScanError` (exit code 2) | `Verdict.ScanError` (error response) |
164
+
165
+ ---
166
+
167
+ ## When to use local mode
168
+
169
+ - Development and testing on a developer's machine
170
+ - Low-traffic applications (< a few uploads per minute)
171
+ - Environments where Docker is unavailable
172
+ - Simple scripts and one-off scans
173
+
174
+ ## When to use TCP mode
175
+
176
+ - Production applications with concurrent uploads
177
+ - Docker or Kubernetes deployments
178
+ - Scanning in-memory buffers or streams with zero disk I/O
179
+ - Environments where the application container cannot install ClamAV