npm - pompelmi - Versions diffs - 1.5.0 → 1.7.0 - Mend

pompelmi 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +113 -195
package/action/Dockerfile +24 -0
package/action/entrypoint.sh +23 -0
package/action/scanner.js +89 -0
package/action.yml +29 -0
package/llms.txt +22 -99
package/package.json +1 -1
package/pr_info.tmp +2 -0
package/release-notes-v1.4.0.md +25 -0
package/release-notes-v1.5.0.md +37 -0
package/src/BufferScanner.js +20 -17
package/src/ClamAVScanner.js +4 -4
package/src/ClamdScanner.js +18 -15
package/src/StreamScanner.js +20 -17
package/wiki/api-reference.md +268 -0
package/wiki/cli-usage.md +263 -0
package/wiki/concurrent-scanning.md +199 -0
package/wiki/docker-compose-production.md +190 -0
package/wiki/docker-setup.md +178 -0
package/wiki/error-handling.md +242 -0
package/wiki/express-integration.md +227 -0
package/wiki/fastify-integration.md +207 -0
package/wiki/home.md +0 -0
package/wiki/local-vs-tcp-mode.md +179 -0
package/wiki/multer-memory-storage.md +166 -0
package/wiki/nestjs-integration.md +228 -0
package/wiki/nextjs-integration.md +209 -0
package/wiki/performance.md +178 -0
package/wiki/quarantine-workflow.md +260 -0
package/wiki/rest-api-server.md +297 -0
package/wiki/s3-integration.md +233 -0
package/wiki/security-considerations.md +192 -0
package/wiki/typescript-usage.md +239 -0
package/wiki/verdicts.md +192 -0
package/wiki/virus-definitions.md +194 -0

package/wiki/express-integration.md ADDED Viewed

@@ -0,0 +1,227 @@
+# Express Integration
+This page covers integrating pompelmi into an Express application for file upload scanning — both disk storage (scan by path) and memory storage (scan by buffer).
+---
+## Setup
+```bash
+npm install pompelmi multer express
+```
+---
+## Disk storage (scan by file path)
+multer writes the uploaded file to disk before your route handler runs. Call `scan(req.file.path)` to scan it.
+```js
+const express = require('express');
+const multer  = require('multer');
+const fs      = require('fs');
+const path    = require('path');
+const { scan, Verdict } = require('pompelmi');
+const upload = multer({ dest: path.join(__dirname, 'uploads') });
+const app    = express();
+app.post('/upload', upload.single('file'), async (req, res) => {
+  if (!req.file) {
+    return res.status(400).json({ error: 'No file uploaded.' });
+  }
+  const filePath = req.file.path;
+  try {
+    const result = await scan(filePath);
+    if (result === Verdict.Malicious) {
+      fs.unlinkSync(filePath);
+      return res.status(422).json({ error: 'Malicious file rejected.' });
+    }
+    if (result === Verdict.ScanError) {
+      fs.unlinkSync(filePath);
+      return res.status(422).json({ error: 'Scan incomplete — file rejected as precaution.' });
+    }
+    // Verdict.Clean — rename to final destination or store as-is
+    return res.json({ ok: true, filename: req.file.filename });
+  } catch (err) {
+    // clamscan not in PATH, file not found, process killed, etc.
+    try { fs.unlinkSync(filePath); } catch {}
+    return res.status(500).json({ error: `Scan failed: ${err.message}` });
+  }
+});
+app.listen(3000);
+```
+### With TCP mode (Docker sidecar)
+```js
+const SCAN_OPTS = {
+  host: process.env.CLAMAV_HOST || '127.0.0.1',
+  port: Number(process.env.CLAMAV_PORT) || 3310,
+  timeout: 30_000,
+};
+const result = await scan(filePath, SCAN_OPTS);
+```
+---
+## Memory storage (scan by buffer)
+When you use `multer({ storage: multer.memoryStorage() })`, the file is never written to disk — it lives in `req.file.buffer`. Use `scanBuffer()` instead of `scan()`.
+```js
+const express = require('express');
+const multer  = require('multer');
+const { scanBuffer, Verdict } = require('pompelmi');
+const upload = multer({ storage: multer.memoryStorage() });
+const app    = express();
+const SCAN_OPTS = {
+  host: process.env.CLAMAV_HOST,
+  port: 3310,
+};
+app.post('/upload', upload.single('file'), async (req, res) => {
+  if (!req.file) {
+    return res.status(400).json({ error: 'No file uploaded.' });
+  }
+  try {
+    const result = await scanBuffer(req.file.buffer, SCAN_OPTS);
+    if (result === Verdict.Malicious) {
+      return res.status(422).json({ error: 'Malicious file rejected.' });
+    }
+    if (result === Verdict.ScanError) {
+      return res.status(422).json({ error: 'Scan incomplete — file rejected.' });
+    }
+    // Clean — forward buffer to storage (S3, database, disk)
+    return res.json({ ok: true, originalname: req.file.originalname });
+  } catch (err) {
+    return res.status(500).json({ error: `Scan failed: ${err.message}` });
+  }
+});
+app.listen(3000);
+```
+---
+## Scanning multiple files in one request
+```js
+app.post('/upload-many', upload.array('files', 10), async (req, res) => {
+  if (!req.files || req.files.length === 0) {
+    return res.status(400).json({ error: 'No files uploaded.' });
+  }
+  const results = await Promise.allSettled(
+    req.files.map(async (file) => {
+      const verdict = await scan(file.path, SCAN_OPTS);
+      return { file, verdict };
+    })
+  );
+  const rejected = [];
+  const accepted = [];
+  for (const r of results) {
+    if (r.status === 'rejected') {
+      rejected.push({ filename: '?', reason: r.reason.message });
+      continue;
+    }
+    const { file, verdict } = r.value;
+    if (verdict !== Verdict.Clean) {
+      try { fs.unlinkSync(file.path); } catch {}
+      rejected.push({ filename: file.originalname, reason: verdict.description });
+    } else {
+      accepted.push(file.originalname);
+    }
+  }
+  if (rejected.length > 0) {
+    return res.status(422).json({ accepted, rejected });
+  }
+  return res.json({ ok: true, accepted });
+});
+```
+---
+## Centralised error handling middleware
+Extract scan logic into middleware for reuse across routes:
+```js
+async function scanUpload(req, res, next) {
+  if (!req.file) return next();
+  const filePath = req.file.path;
+  try {
+    const result = await scan(filePath, SCAN_OPTS);
+    if (result !== Verdict.Clean) {
+      try { fs.unlinkSync(filePath); } catch {}
+      const status = result === Verdict.Malicious ? 422 : 422;
+      return res.status(status).json({ error: `Upload rejected: ${result.description}` });
+    }
+    next();
+  } catch (err) {
+    try { fs.unlinkSync(filePath); } catch {}
+    next(err);
+  }
+}
+// Use it
+app.post('/profile-photo', upload.single('photo'), scanUpload, (req, res) => {
+  res.json({ ok: true, path: req.file.path });
+});
+```
+---
+## HTTP status codes
+| Situation | Status |
+|-----------|--------|
+| No file in request | `400 Bad Request` |
+| `Verdict.Malicious` | `422 Unprocessable Entity` |
+| `Verdict.ScanError` | `422 Unprocessable Entity` |
+| `scan()` throws | `500 Internal Server Error` |
+| `Verdict.Clean` | `200 OK` (or `201 Created` after storing) |
+---
+## File size limits
+Always set a file size limit on multer to prevent large uploads from exhausting memory or disk:
+```js
+const upload = multer({
+  dest: './uploads',
+  limits: { fileSize: 10 * 1024 * 1024 }, // 10 MB
+});
+```
+multer returns a `MulterError` (subclass of `Error`) when the limit is exceeded. Handle it in your error middleware:
+```js
+app.use((err, req, res, next) => {
+  if (err.code === 'LIMIT_FILE_SIZE') {
+    return res.status(413).json({ error: 'File too large.' });
+  }
+  next(err);
+});
+```

package/wiki/fastify-integration.md ADDED Viewed

@@ -0,0 +1,207 @@
+# Fastify Integration
+Complete guide to integrating pompelmi into a Fastify application. Covers disk-based scanning, stream-based scanning with `scanStream()`, and error handling.
+---
+## Setup
+```bash
+npm install pompelmi fastify @fastify/multipart
+```
+---
+## Disk-based scanning (save to disk first, then scan)
+This pattern saves the uploaded file to disk via `pipeline`, then scans it by path. Most straightforward for large files.
+```js
+const Fastify   = require('fastify');
+const { pipeline } = require('stream/promises');
+const fs        = require('fs');
+const path      = require('path');
+const { scan, Verdict } = require('pompelmi');
+const app = Fastify({ logger: true });
+app.register(require('@fastify/multipart'));
+const SCAN_OPTS = {
+  host: process.env.CLAMAV_HOST,
+  port: Number(process.env.CLAMAV_PORT) || 3310,
+  timeout: 30_000,
+};
+app.post('/upload', async (req, reply) => {
+  const data     = await req.file();
+  const filePath = path.join('./uploads', `${Date.now()}-${data.filename}`);
+  // Write to disk
+  await pipeline(data.file, fs.createWriteStream(filePath));
+  let result;
+  try {
+    result = await scan(filePath, SCAN_OPTS);
+  } catch (err) {
+    try { fs.unlinkSync(filePath); } catch {}
+    return reply.code(500).send({ error: `Scan failed: ${err.message}` });
+  }
+  if (result !== Verdict.Clean) {
+    try { fs.unlinkSync(filePath); } catch {}
+    return reply.code(422).send({ error: `Upload rejected: ${result.description}` });
+  }
+  return reply.send({ ok: true, filename: path.basename(filePath) });
+});
+app.listen({ port: 3000 });
+```
+---
+## Stream-based scanning (no disk I/O in TCP mode)
+When using TCP mode (clamd sidecar), pass the upload stream directly to `scanStream()`. The file never touches the application host's disk.
+```js
+const Fastify = require('fastify');
+const { scanStream, Verdict } = require('pompelmi');
+const app = Fastify({ logger: true });
+app.register(require('@fastify/multipart'));
+const SCAN_OPTS = {
+  host: process.env.CLAMAV_HOST || 'clamav',
+  port: 3310,
+  timeout: 30_000,
+};
+app.post('/upload', async (req, reply) => {
+  const data = await req.file();
+  let result;
+  try {
+    result = await scanStream(data.file, SCAN_OPTS);
+  } catch (err) {
+    return reply.code(500).send({ error: `Scan failed: ${err.message}` });
+  }
+  if (result === Verdict.Malicious) {
+    return reply.code(422).send({ error: 'Malicious file rejected.' });
+  }
+  if (result === Verdict.ScanError) {
+    return reply.code(422).send({ error: 'Scan incomplete — file rejected.' });
+  }
+  // Stream is consumed — if you need to store the file you must re-read it.
+  // With stream scanning, save to storage (S3, disk) in a separate step
+  // using the original source, not data.file (already consumed).
+  return reply.send({ ok: true, filename: data.filename });
+});
+app.listen({ port: 3000 });
+```
+> **Note on stream consumption:** Once `scanStream()` consumes `data.file`, the stream is exhausted. If you need to store the file after scanning, either save it to disk first (disk-based pattern) or scan and upload to S3 in parallel using a passthrough stream.
+---
+## Scanning multiple files
+```js
+app.post('/upload-many', async (req, reply) => {
+  const parts   = req.files();
+  const results = [];
+  for await (const part of parts) {
+    const filePath = path.join('./uploads', `${Date.now()}-${part.filename}`);
+    await pipeline(part.file, fs.createWriteStream(filePath));
+    const verdict = await scan(filePath, SCAN_OPTS).catch(err => {
+      try { fs.unlinkSync(filePath); } catch {}
+      return null;
+    });
+    if (!verdict || verdict !== Verdict.Clean) {
+      try { fs.unlinkSync(filePath); } catch {}
+      results.push({ filename: part.filename, accepted: false, reason: verdict?.description ?? 'scan_error' });
+    } else {
+      results.push({ filename: part.filename, accepted: true });
+    }
+  }
+  const anyRejected = results.some(r => !r.accepted);
+  return reply.code(anyRejected ? 422 : 200).send({ results });
+});
+```
+---
+## Error handling with `setErrorHandler`
+Register a global error handler for unexpected failures:
+```js
+app.setErrorHandler((err, req, reply) => {
+  req.log.error(err);
+  reply.code(500).send({ error: 'Internal server error.' });
+});
+```
+For multer-equivalent size limits with `@fastify/multipart`:
+```js
+app.register(require('@fastify/multipart'), {
+  limits: {
+    fileSize: 10 * 1024 * 1024, // 10 MB
+  },
+});
+app.setErrorHandler((err, req, reply) => {
+  if (err.code === 'FST_REQ_FILE_TOO_LARGE') {
+    return reply.code(413).send({ error: 'File too large.' });
+  }
+  reply.code(500).send({ error: err.message });
+});
+```
+---
+## HTTP status codes
+| Situation | Status |
+|-----------|--------|
+| No file part in request | `400 Bad Request` |
+| `Verdict.Malicious` | `422 Unprocessable Entity` |
+| `Verdict.ScanError` | `422 Unprocessable Entity` |
+| `scan()` / `scanStream()` throws | `500 Internal Server Error` |
+| File too large | `413 Content Too Large` |
+| `Verdict.Clean` | `200 OK` |
+---
+## TypeScript
+```ts
+import Fastify from 'fastify';
+import multipart from '@fastify/multipart';
+import { scanStream, Verdict } from 'pompelmi';
+const app = Fastify();
+app.register(multipart);
+app.post('/upload', async (req, reply) => {
+  const data = await req.file();
+  if (!data) return reply.code(400).send({ error: 'No file.' });
+  const result = await scanStream(data.file, { host: 'clamav', port: 3310 });
+  if (result !== Verdict.Clean) {
+    return reply.code(422).send({ error: result.description });
+  }
+  return reply.send({ ok: true });
+});
+```

package/wiki/home.md ADDED Viewed

File without changes

package/wiki/local-vs-tcp-mode.md ADDED Viewed

@@ -0,0 +1,179 @@
+# Local vs TCP Mode
+pompelmi supports two scanning modes. Which one you use is controlled entirely by the options you pass — no configuration files, no environment flags in pompelmi itself.
+---
+## Summary
+| | Local mode | TCP mode |
+|---|---|---|
+| **How it works** | Spawns `clamscan` as a child process | Streams file to `clamd` daemon over TCP |
+| **ClamAV requirement** | `clamscan` binary in PATH | Running `clamd` daemon reachable over TCP |
+| **Startup time** | Slow — loads virus DB each invocation | Fast — daemon keeps DB in memory |
+| **Throughput** | Low — one process per scan | High — persistent connection |
+| **Disk I/O** | Reads file from disk | Reads file from disk (or buffer/stream with no disk) |
+| **Docker** | Requires ClamAV in app container | Use ClamAV as a sidecar |
+| **Zero-copy scan** | Not possible | `scanBuffer()` and `scanStream()` with no disk I/O |
+---
+## Enabling local mode
+Do not pass `host` or `port`. pompelmi spawns `clamscan --no-summary <filePath>`:
+```js
+const { scan, Verdict } = require('pompelmi');
+// Local mode — no options, or empty options
+const result = await scan('/uploads/file.pdf');
+const result = await scan('/uploads/file.pdf', {});
+```
+`clamscan` must be in `PATH`. Install it with:
+```bash
+# macOS
+brew install clamav && freshclam
+# Linux
+sudo apt-get install -y clamav && sudo freshclam
+```
+---
+## Enabling TCP mode
+Pass `host` (and optionally `port`) to any scan function:
+```js
+const result = await scan('/uploads/file.pdf', {
+  host:    '127.0.0.1',
+  port:    3310,           // default 3310
+  timeout: 30_000,         // socket idle timeout ms, default 15000
+});
+```
+Setting `host` switches all four functions — `scan`, `scanBuffer`, `scanStream`, `scanDirectory` — to TCP mode.
+---
+## How local mode works
+```
+pompelmi              OS
+   │                   │
+   ├── spawn clamscan ─┤
+   │                   ├── load virus DB (~300 MB) into memory
+   │                   ├── scan file
+   │                   ├── exit 0 / 1 / 2
+   ├── read exit code ─┘
+   │
+   └── resolve Verdict
+```
+Each scan call:
+1. Spawns a new `clamscan` process
+2. `clamscan` loads the full virus database into memory
+3. Scans the file
+4. Exits with code 0 (clean), 1 (malicious), or 2 (error)
+5. pompelmi maps the exit code to a Verdict Symbol
+**Typical latency:** 400–800 ms per scan (dominated by database load time).
+---
+## How TCP mode works
+```
+pompelmi              clamd daemon
+   │                   │
+   ├── TCP connect ───►│ (keep-alive daemon)
+   ├── INSTREAM ───────┤
+   ├── stream chunks ──┤ scan in memory
+   │                   ├── "stream: OK" / "stream: X FOUND"
+   ├── read response ──┘
+   │
+   └── resolve Verdict
+```
+Each scan call:
+1. Opens a TCP connection to clamd
+2. Sends `zINSTREAM\0` command
+3. Streams the file in 64 KB chunks, each prefixed with a 4-byte big-endian length header
+4. Sends 4 zero bytes to signal end of stream
+5. Reads the response line
+6. Maps response to Verdict Symbol
+**Typical latency:** 5–50 ms per scan (clamd keeps DB in memory; network is the bottleneck).
+---
+## Performance comparison
+| Metric | Local mode | TCP mode |
+|--------|-----------|----------|
+| First scan latency | ~600 ms | ~10 ms |
+| Subsequent scan latency | ~600 ms | ~10 ms |
+| Concurrent scans (4-core) | ~4 (CPU-bound) | ~50+ |
+| Memory per scan | ~300 MB (DB load) | ~0 (clamd holds DB) |
+Local mode is fine for low-traffic applications (< 10 uploads/minute). TCP mode is required for any sustained upload throughput.
+---
+## Switching modes without changing application code
+Structure your options from environment variables so the same code runs in local mode during development and TCP mode in production:
+```js
+const SCAN_OPTS = process.env.CLAMAV_HOST
+  ? {
+      host:    process.env.CLAMAV_HOST,
+      port:    Number(process.env.CLAMAV_PORT) || 3310,
+      timeout: Number(process.env.CLAMAV_TIMEOUT) || 15_000,
+    }
+  : {}; // local mode — empty options
+const result = await scan('/uploads/file.pdf', SCAN_OPTS);
+```
+Set `CLAMAV_HOST=clamav` in your Docker environment; leave it unset in local development.
+---
+## Timeout differences
+| | Local mode | TCP mode |
+|---|---|---|
+| **`timeout` option** | Ignored | Socket idle timeout in ms |
+| **Default timeout** | OS process timeout | 15 000 ms |
+| **Timeout error** | `Process killed by signal: SIGTERM` | `clamd connection timed out after Nms` |
+In local mode, the process runs until `clamscan` finishes or the OS kills it. In TCP mode, pompelmi sets a socket idle timeout — if clamd stops sending data for longer than `timeout` ms, the connection is closed and the promise rejects.
+---
+## Error behaviour differences
+| Condition | Local mode error | TCP mode error |
+|-----------|-----------------|----------------|
+| Service unavailable | `ENOENT` (clamscan not found) | `ECONNREFUSED` |
+| Service slow | Process runs to completion | `clamd connection timed out` |
+| File not scannable | `Verdict.ScanError` (exit code 2) | `Verdict.ScanError` (error response) |
+---
+## When to use local mode
+- Development and testing on a developer's machine
+- Low-traffic applications (< a few uploads per minute)
+- Environments where Docker is unavailable
+- Simple scripts and one-off scans
+## When to use TCP mode
+- Production applications with concurrent uploads
+- Docker or Kubernetes deployments
+- Scanning in-memory buffers or streams with zero disk I/O
+- Environments where the application container cannot install ClamAV