secure-file-check 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,420 @@
1
+ Secure File Check
2
+ =================
3
+
4
+ CLI + API for file type validation and suspicious Office detection
5
+
6
+ A lightweight utility to validate file types by extension + magic bytes and detect potentially malicious Office documents (e.g., VBA macros). Supports both CLI and programmatic API usage.
7
+
8
+ ---
9
+
10
+ Features
11
+
12
+ - Validate file content against extension using magic-byte detectors
13
+ - Detect suspicious Office documents (e.g., presence of VBA project)
14
+ - CLI with JSON output and audit logging
15
+ - Scan folders in parallel
16
+ - Ignore patterns for selective scanning
17
+ - Configurable whitelist of allowed file types
18
+ - Supported file types: png, jpg, jpeg, pdf, docx, xlsx, gif, bmp, tiff, mp3, wav, svg
19
+
20
+ ---
21
+
22
+ CLI Usage & Output
23
+
24
+ 1. Scan a single file
25
+
26
+ ```sh
27
+ node bin/cli.js --file=path/to/file
28
+ ```
29
+
30
+ Example stdout:
31
+
32
+ ```json
33
+ {"timestamp":"2026-03-24T09:49:49.386Z","file":"uploads/a.png","status":"valid","ext":"png","size":1024,"suspicious":false}
34
+ ```
35
+
36
+ - `status`: `valid` / `invalid`
37
+ - `suspicious`: `true` if suspicious Office file
38
+ - `size`: file size in bytes
39
+ - `timestamp`: scan time
40
+
41
+ ---
42
+
43
+ 2. Scan a folder
44
+
45
+ ```sh
46
+ node bin/cli.js --folder=uploads
47
+ ```
48
+
49
+ Example stdout (per-file JSON lines):
50
+
51
+ ```json
52
+ {"timestamp":"2026-03-24T09:49:49.405Z","file":"uploads/1.png","status":"valid","ext":"png","size":1024,"suspicious":false}
53
+ {"timestamp":"2026-03-24T09:49:49.406Z","file":"uploads/2.jpg","status":"valid","ext":"jpg","size":2048,"suspicious":false}
54
+ {"timestamp":"2026-03-24T09:49:49.407Z","file":"uploads/fake.docx","status":"invalid","ext":"docx","size":512,"reason":"Invalid ZIP (no EOCD)"}
55
+ ```
56
+
57
+ - One JSON object per line — easy to parse with CI/CD scripts or log aggregators
58
+ - `reason` appears when `status` = `invalid`
59
+
60
+ ---
61
+
62
+ 3. JSON output for folder scan
63
+
64
+ ```sh
65
+ node bin/cli.js --folder=uploads --json
66
+ ```
67
+
68
+ Example stdout:
69
+
70
+ ```json
71
+ [
72
+ {"timestamp":"2026-03-24T09:49:49.405Z","file":"uploads/1.png","status":"valid","ext":"png","size":1024,"suspicious":false},
73
+ {"timestamp":"2026-03-24T09:49:49.405Z","file":"uploads/2.jpg","status":"valid","ext":"jpg","size":2048,"suspicious":false}
74
+ ]
75
+ ```
76
+
77
+ - `--json` emits a single JSON array on stdout
78
+ - Audit log lines remain on `stderr`
79
+
80
+ Notes for CI
81
+
82
+ - By default (without `--json`) the CLI emits one JSON object per file on stdout (one line per file). This is streamable and CI-friendly for large scans.
83
+ - Audit/human-readable lines are written to `stderr` so tools can capture structured stdout separately.
84
+ - Use `--json` to get a single JSON array of all results on stdout.
85
+
86
+ ---
87
+
88
+ 4. Audit log
89
+
90
+ - The CLI writes structured JSON audit lines to `stderr`:
91
+
92
+ ```json
93
+ {"timestamp":"2026-03-24T09:49:49.386Z","file":"uploads/a.png","status":"valid"}
94
+ {"timestamp":"2026-03-24T09:49:49.390Z","file":"uploads/fake.png","status":"invalid","reason":"File content invalid"}
95
+ ```
96
+
97
+ - Use `--log=FILE` to append audit lines to a file
98
+ - Useful for CI/CD, SIEM integration, or auditing
99
+
100
+ ---
101
+
102
+ 5. Additional CLI options
103
+
104
+ ```
105
+ --file=PATH Scan single file
106
+ --folder=PATH Scan folder recursively
107
+ --ignore=PATTERN Ignore files/folders matching pattern
108
+ --allow=EXT1,EXT2 Only allow these extensions
109
+ --json Output JSON array (folder scan)
110
+ --log=FILE Write audit log to file
111
+ --parallel=N Parallel scans (default = CPU cores)
112
+ --fail-fast Exit immediately on first invalid file (exit code 1)
113
+ ```
114
+
115
+ ---
116
+
117
+ 6. Exit codes
118
+
119
+ - `0` — all files valid
120
+ - `1` — at least one file invalid or an error occurred
121
+
122
+ When using `--fail-fast`, the CLI exits immediately with code `1` on the first invalid file detected. Without `--fail-fast` the CLI runs all checks, prints a summary, and exits with `1` if any file was invalid.
123
+
124
+ ---
125
+
126
+ Allowed file types
127
+
128
+ The CLI and API both support restricting scans to an explicit whitelist of file extensions.
129
+
130
+ - Default allowed types (built-in): `png`, `jpg`, `jpeg`, `pdf`, `docx`, `xlsx`, `gif`, `bmp`, `tiff`, `mp3`, `wav`, `svg`.
131
+
132
+ CLI example (allow only images):
133
+
134
+ ```sh
135
+ node bin/cli.js --folder=uploads --allow=png,jpg,gif,svg
136
+ ```
137
+
138
+ Notes:
139
+ - Provide extensions as a comma-separated list (no leading dots). The CLI lowercases and matches against file extensions.
140
+ - If `--allow` is omitted the tool uses the built-in default list.
141
+
142
+ Programmatic API example:
143
+
144
+ ```js
145
+ await validateFile('/path/to/file.png', {
146
+ allowedTypes: ['png', 'jpg', 'svg'],
147
+ });
148
+ ```
149
+
150
+ The `allowedTypes` array should contain lowercase extensions without the leading dot. The `validateFile` function will reject files whose extension is not included in the whitelist.
151
+
152
+ ---
153
+
154
+ Example: React / Next.js integration
155
+
156
+ This example shows a Next.js API route that accepts a multipart file upload, saves it to a temporary path, calls `validateFile`, and returns the result. It uses `formidable` for parsing on the server — install it in your Next.js project if you follow this pattern.
157
+
158
+ pages/api/validate.js
159
+
160
+ ```js
161
+ import formidable from 'formidable';
162
+ import fs from 'fs/promises';
163
+ import { validateFile } from 'secure-file-check';
164
+
165
+ export const config = { api: { bodyParser: false } };
166
+
167
+ export default async function handler(req, res) {
168
+ const form = new formidable.IncomingForm({ uploadDir: '/tmp', keepExtensions: true });
169
+ let filepath;
170
+
171
+ try {
172
+ const { fields, files } = await new Promise((resolve, reject) => {
173
+ form.parse(req, (err, fields, files) => (err ? reject(err) : resolve({ fields, files })));
174
+ });
175
+
176
+ const file = files.file;
177
+ filepath = file?.filepath || file?.path;
178
+ if (!filepath) throw new Error('No uploaded file found');
179
+
180
+ const result = await validateFile(filepath);
181
+ res.json(result);
182
+ } catch (err) {
183
+ res.status(400).json({ error: err?.message || String(err) });
184
+ } finally {
185
+ if (filepath) await fs.unlink(filepath).catch(() => {});
186
+ }
187
+ }
188
+ ```
189
+
190
+ Client example (browser / Next.js page):
191
+
192
+ ```js
193
+ async function submitFile(file) {
194
+ const fd = new FormData();
195
+ fd.append('file', file);
196
+
197
+ const r = await fetch('/api/validate', { method: 'POST', body: fd });
198
+ const json = await r.json();
199
+ console.log('validation', json);
200
+ }
201
+ ```
202
+
203
+ Notes
204
+ - Keep uploads ephemeral (remove temp files after validation).
205
+ - Running `validateFile` from a Next.js API route requires access to the code (the `src/` folder) from the server runtime — this works in Node-based deployments (Vercel serverless functions may need adaptation).
206
+
207
+
208
+ Programmatic API
209
+
210
+ Use the exported `validateFile` function from `src/validator.js`:
211
+
212
+ ```js
213
+ import { validateFile } from './src/validator.js';
214
+
215
+ const res = await validateFile('/path/to/file', {
216
+ allowedTypes: ['png','jpg','pdf'],
217
+ maxSize: 10*1024*1024, // 10MB
218
+ detectSuspicious: true,
219
+ });
220
+
221
+ console.log(res);
222
+ // Example: { ext: 'png', size: 1024, suspicious: false }
223
+ ```
224
+
225
+ - `allowedTypes`: array of extensions to allow
226
+ - `maxSize`: maximum allowed size in bytes
227
+ - `detectSuspicious`: enable detection of suspicious Office files
228
+
229
+ ---
230
+
231
+ Key files
232
+
233
+ - `bin/cli.js` — CLI entrypoint
234
+ - `src/validator.js` — validation logic and allowed types
235
+ - `src/detectors.js` — magic-byte detectors for file types
236
+ - `src/zip.js` — parse Office zip entries for macros
237
+ - `src/logger.js` — audit logging (writes structured JSON to stderr)
238
+
239
+ ---
240
+
241
+ Testing
242
+
243
+ Run all tests:
244
+
245
+ ```sh
246
+ node --test
247
+ ```
248
+
249
+ Or run specific tests:
250
+
251
+ ```sh
252
+ node --test test/validator.test.js
253
+ node --test test/cli.test.js
254
+ ```
255
+
256
+ - Tests cover validator core and CLI behavior
257
+ - Temporary files are auto-cleaned after tests
258
+
259
+ ---
260
+
261
+ Contributing
262
+
263
+ Contributions and bug reports welcome. Please open issues or PRs with a clear reproduction and tests when appropriate.
264
+
265
+ ---
266
+
267
+ License
268
+
269
+ MIT
270
+
271
+ ---
272
+
273
+ Installation
274
+
275
+ Install the package (choose one):
276
+
277
+ ```sh
278
+ # npm (local)
279
+ npm install --save secure-file-check
280
+
281
+ # npm (global CLI)
282
+ npm install -g secure-file-check
283
+
284
+ # npx (run without installing globally)
285
+ npx secure-file-check --file=path/to/file
286
+
287
+ # yarn
288
+ yarn add secure-file-check
289
+ # yarn global (CLI)
290
+ yarn global add secure-file-check
291
+
292
+ # pnpm
293
+ pnpm add secure-file-check
294
+ # pnpm global (CLI)
295
+ pnpm add -g secure-file-check
296
+ ```
297
+
298
+ Programmatic usage after install (import from the package):
299
+
300
+ ```js
301
+ import { validateFile } from 'secure-file-check';
302
+
303
+ const res = await validateFile('/path/to/file');
304
+ ```
305
+
306
+ Publishing
307
+
308
+ To publish the package to npm (one-time setup may be required):
309
+
310
+ ```sh
311
+ # bump version in package.json, build if needed
312
+ npm publish --access public
313
+ ```
314
+
315
+ Notes:
316
+ - The package exposes the `validateFile` function as its module export.
317
+ - The CLI binary name is `secure-file-check` (installed via the `bin` field).
318
+
319
+ CI / GitHub Actions
320
+
321
+ Here's a minimal GitHub Actions workflow you can add at `.github/workflows/ci.yml`. It runs tests on PRs and pushes, and publishes to npm when a tag is pushed (requires `NPM_TOKEN` repo secret).
322
+
323
+ ```yaml
324
+ name: CI
325
+
326
+ on:
327
+ push:
328
+ branches: [main]
329
+ tags: ['v*']
330
+ pull_request:
331
+ branches: [main]
332
+
333
+ jobs:
334
+ test:
335
+ runs-on: ubuntu-latest
336
+ steps:
337
+ - uses: actions/checkout@v4
338
+ - uses: actions/setup-node@v4
339
+ with:
340
+ node-version: '18'
341
+ cache: 'npm'
342
+ - run: npm ci
343
+ - run: npm test
344
+
345
+ publish:
346
+ needs: test
347
+ runs-on: ubuntu-latest
348
+ if: startsWith(github.ref, 'refs/tags/v')
349
+ steps:
350
+ - uses: actions/checkout@v4
351
+ - uses: actions/setup-node@v4
352
+ with:
353
+ node-version: '18'
354
+ registry-url: 'https://registry.npmjs.org'
355
+ - run: npm ci
356
+ - run: npm publish --access public
357
+ env:
358
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
359
+ ```
360
+
361
+ Notes:
362
+ - The CLI writes structured per-file JSON to `stdout` and audit JSON lines to `stderr` — in CI you can capture `stdout` for structured analysis and save `stderr` for audit logs.
363
+ - Set `NPM_TOKEN` in repository secrets to enable publishing. Optionally use `GITHUB_TOKEN` and a release action to create GitHub releases.
364
+
365
+ Background scanning / cron
366
+
367
+ If you want an automated background scanner for an `uploads` folder (e.g., nightly or hourly), add the included helper script `scripts/scan-uploads.sh` to your server and schedule it with `cron` or systemd timers. The script writes a timestamped JSON results file and an audit log.
368
+
369
+ Example `cron` (runs hourly):
370
+
371
+ ```
372
+ # run hourly at :00
373
+ 0 * * * * /usr/bin/env bash /srv/app/secure-file-check/scripts/scan-uploads.sh /var/www/app/uploads /var/log/secure-file-check
374
+ ```
375
+
376
+ Example `systemd` unit + timer (optional)
377
+
378
+ `/etc/systemd/system/secure-file-check.service`:
379
+
380
+ ```
381
+ [Unit]
382
+ Description=Secure File Check scanner
383
+
384
+ [Service]
385
+ Type=oneshot
386
+ ExecStart=/usr/bin/env bash /srv/app/secure-file-check/scripts/scan-uploads.sh /var/www/app/uploads /var/log/secure-file-check
387
+ ```
388
+
389
+ `/etc/systemd/system/secure-file-check.timer`:
390
+
391
+ ```
392
+ [Unit]
393
+ Description=Run secure-file-check hourly
394
+
395
+ [Timer]
396
+ OnCalendar=hourly
397
+
398
+ [Install]
399
+ WantedBy=timers.target
400
+ ```
401
+
402
+ Then enable with:
403
+
404
+ ```sh
405
+ sudo systemctl enable --now secure-file-check.timer
406
+ ```
407
+
408
+ Docker sidecar / cron container
409
+
410
+ You can also run the scanner from a container that mounts your uploads directory and a host directory for logs:
411
+
412
+ ```sh
413
+ docker run --rm \
414
+ -v /var/www/app/uploads:/uploads:ro \
415
+ -v /var/log/secure-file-check:/scan-output \
416
+ node:18 \
417
+ bash -c "npm ci && node /workspace/bin/cli.js --folder=/uploads --json --log=/scan-output/scan.log"
418
+ ```
419
+
420
+
package/bin/cli.js ADDED
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env node
2
+ import fs from "fs/promises";
3
+ import path from "path";
4
+ import os from "os";
5
+ import { validateFile } from "../src/validator.js";
6
+ import { initLogger } from "../src/logger.js";
7
+
8
+ // ===== ARG =====
9
+ const args = process.argv.slice(2);
10
+
11
+ const fileArg = args.find((a) => a.startsWith("--file="));
12
+ const folderArg = args.find((a) => a.startsWith("--folder="));
13
+ const allowArg = args.find((a) => a.startsWith("--allow="));
14
+ const ignoreArg = args.find((a) => a.startsWith("--ignore="));
15
+ const logArg = args.find((a) => a.startsWith("--log="));
16
+ const jsonMode = args.includes("--json");
17
+ const failFast = args.includes("--fail-fast");
18
+ const parallelArg = args.find((a) => a.startsWith("--parallel="));
19
+
20
+ // ===== INIT LOGGER =====
21
+ initLogger({
22
+ file: logArg ? logArg.split("=")[1] : null,
23
+ });
24
+
25
+ // ===== CONFIG =====
26
+ const allowedTypes = allowArg
27
+ ? allowArg.split("=")[1].split(",")
28
+ : undefined;
29
+
30
+ const ignorePatterns = ignoreArg
31
+ ? ignoreArg.split("=")[1].split(",")
32
+ : [];
33
+
34
+ const concurrency = parallelArg
35
+ ? parseInt(parallelArg.split("=")[1])
36
+ : os.cpus().length;
37
+
38
+ // ===== HELPERS =====
39
+ function isIgnored(filePath) {
40
+ return ignorePatterns.some((p) => filePath.includes(p));
41
+ }
42
+
43
+ async function walkDir(dir) {
44
+ const results = [];
45
+ const list = await fs.readdir(dir, { withFileTypes: true });
46
+
47
+ for (const f of list) {
48
+ const fullPath = path.join(dir, f.name);
49
+
50
+ if (isIgnored(fullPath)) continue;
51
+
52
+ if (f.isDirectory()) {
53
+ results.push(...(await walkDir(fullPath)));
54
+ } else {
55
+ results.push(fullPath);
56
+ }
57
+ }
58
+
59
+ return results;
60
+ }
61
+
62
+ async function runWithConcurrency(tasks, limit) {
63
+ const results = [];
64
+ let index = 0;
65
+
66
+ async function worker() {
67
+ while (index < tasks.length) {
68
+ const i = index++;
69
+ results[i] = await tasks[i]();
70
+ }
71
+ }
72
+
73
+ await Promise.all(Array.from({ length: limit }, worker));
74
+ return results;
75
+ }
76
+
77
+ // ===== MAIN =====
78
+ async function run() {
79
+ let files = [];
80
+
81
+ if (fileArg) {
82
+ files = [fileArg.split("=")[1]];
83
+ } else if (folderArg) {
84
+ files = await walkDir(folderArg.split("=")[1]);
85
+ } else {
86
+ console.error("Usage: secure-file-check --file=path OR --folder=path");
87
+ process.exit(1);
88
+ }
89
+
90
+ const tasks = files.map((file) => async () => {
91
+ try {
92
+ const res = await validateFile(file, { allowedTypes });
93
+
94
+ const out = { file, status: "valid", ...res };
95
+ if (!jsonMode) {
96
+ // print one JSON object per line to stdout for CI parsing
97
+ console.log(JSON.stringify(out));
98
+ }
99
+
100
+ return out;
101
+ } catch (err) {
102
+ const out = { file, status: "invalid", error: err.message };
103
+
104
+ // keep human-friendly audit on stderr
105
+ console.error(`${file} -> ${err.message}`);
106
+
107
+ if (!jsonMode) console.log(JSON.stringify(out));
108
+
109
+ if (failFast) {
110
+ // immediate exit on first failure (useful in CI)
111
+ process.exit(1);
112
+ }
113
+
114
+ return out;
115
+ }
116
+ });
117
+
118
+ const results = await runWithConcurrency(tasks, concurrency);
119
+
120
+ const hasError = results.some((r) => r.status === "invalid");
121
+
122
+ if (jsonMode) {
123
+ console.log(JSON.stringify(results, null, 2));
124
+ } else {
125
+ console.log("\n--- Summary ---");
126
+ console.log(`Total: ${results.length}`);
127
+ console.log(`Invalid: ${results.filter(r => r.status === "invalid").length}`);
128
+ }
129
+
130
+ process.exit(hasError ? 1 : 0);
131
+ }
132
+
133
+ run();
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "secure-file-check",
3
+ "description": "CLI + API for validating file types and detecting suspicious Office documents (VBA/macro detection).",
4
+ "version": "1.0.0",
5
+ "type": "module",
6
+ "main": "./src/validator.js",
7
+ "scripts": {
8
+ "test": "node --test",
9
+ "start": "node ./bin/cli.js",
10
+ "cli": "node ./bin/cli.js",
11
+ "prepublishOnly": "node --test"
12
+ },
13
+ "bin": {
14
+ "secure-file-check": "./bin/cli.js"
15
+ },
16
+ "exports": {
17
+ ".": "./src/validator.js"
18
+ },
19
+ "keywords": ["file","mime","security","upload","validation","cli","validator","vba","office"],
20
+ "author": "vietvq",
21
+ "files": ["bin/","src/","README.md"],
22
+ "engines": {
23
+ "node": ">=16"
24
+ },
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/vietvq/secure-file-check.git"
28
+ },
29
+ "bugs": {
30
+ "url": "https://github.com/vietvq/secure-file-check/issues"
31
+ },
32
+ "homepage": "https://github.com/vietvq/secure-file-check#readme",
33
+ "license": "MIT"
34
+ }
@@ -0,0 +1,69 @@
1
+ export async function readChunk(fd, start, length) {
2
+ const buffer = Buffer.alloc(length);
3
+ await fd.read(buffer, 0, length, start);
4
+ return buffer;
5
+ }
6
+
7
+ export async function isPNG(fd) {
8
+ const b = await readChunk(fd, 0, 8);
9
+ return b.equals(Buffer.from([0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a]));
10
+ }
11
+
12
+ export async function isJPEG(fd) {
13
+ const b = await readChunk(fd, 0, 3);
14
+ return b[0] === 0xff && b[1] === 0xd8 && b[2] === 0xff;
15
+ }
16
+
17
+ export async function isPDF(fd) {
18
+ const b = await readChunk(fd, 0, 4);
19
+ return b.toString() === "%PDF";
20
+ }
21
+
22
+ export async function isZIP(fd) {
23
+ const b = await readChunk(fd, 0, 4);
24
+ return b[0] === 0x50 && b[1] === 0x4b;
25
+ }
26
+
27
+ export async function isGIF(fd) {
28
+ const b = await readChunk(fd, 0, 6);
29
+ const sig = b.toString('ascii');
30
+ return sig === 'GIF87a' || sig === 'GIF89a';
31
+ }
32
+
33
+ export async function isBMP(fd) {
34
+ const b = await readChunk(fd, 0, 2);
35
+ return b[0] === 0x42 && b[1] === 0x4d; // 'BM'
36
+ }
37
+
38
+ export async function isTIFF(fd) {
39
+ const b = await readChunk(fd, 0, 4);
40
+ // little-endian II*\x00 or big-endian MM\x00*
41
+ return (
42
+ (b[0] === 0x49 && b[1] === 0x49 && b[2] === 0x2a && b[3] === 0x00) ||
43
+ (b[0] === 0x4d && b[1] === 0x4d && b[2] === 0x00 && b[3] === 0x2a)
44
+ );
45
+ }
46
+
47
+ export async function isMP3(fd) {
48
+ const b = await readChunk(fd, 0, 3);
49
+ // ID3 tag or frame sync 0xFF 0xFB (or 0xFF 0xF3 / 0xFF 0xF2)
50
+ if (b[0] === 0x49 && b[1] === 0x44 && b[2] === 0x33) return true; // 'ID3'
51
+ const b2 = await readChunk(fd, 0, 2);
52
+ return b2[0] === 0xff && (b2[1] & 0xe0) === 0xe0;
53
+ }
54
+
55
+ export async function isWAV(fd) {
56
+ const b = await readChunk(fd, 0, 12);
57
+ // 'RIFF' .... 'WAVE'
58
+ return (
59
+ b[0] === 0x52 && b[1] === 0x49 && b[2] === 0x46 && b[3] === 0x46 &&
60
+ b[8] === 0x57 && b[9] === 0x41 && b[10] === 0x56 && b[11] === 0x45
61
+ );
62
+ }
63
+
64
+ export async function isSVG(fd) {
65
+ // read some initial bytes as text and look for an <svg tag
66
+ const b = await readChunk(fd, 0, 512);
67
+ const s = b.toString('utf8').toLowerCase();
68
+ return s.includes('<svg');
69
+ }
package/src/logger.js ADDED
@@ -0,0 +1,28 @@
1
+ import fs from "fs/promises";
2
+
3
+ let logFilePath = null;
4
+
5
+ export function initLogger(options = {}) {
6
+ logFilePath = options.file || null;
7
+ }
8
+
9
+ export async function auditLog(entry) {
10
+ const logEntry = {
11
+ timestamp: new Date().toISOString(),
12
+ ...entry,
13
+ };
14
+
15
+ const line = JSON.stringify(logEntry) + "\n";
16
+
17
+ // console -> use stderr so structured stdout (e.g. JSON mode) isn't polluted
18
+ console.error(line.trim());
19
+
20
+ // file (optional)
21
+ if (logFilePath) {
22
+ try {
23
+ await fs.appendFile(logFilePath, line);
24
+ } catch (err) {
25
+ console.error("Logger error:", err.message);
26
+ }
27
+ }
28
+ }
@@ -0,0 +1,165 @@
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+ import {
4
+ isPNG,
5
+ isJPEG,
6
+ isPDF,
7
+ isZIP,
8
+ isGIF,
9
+ isBMP,
10
+ isTIFF,
11
+ isMP3,
12
+ isWAV,
13
+ isSVG,
14
+ } from "./detectors.js";
15
+ import { parseZipEntries, validateOffice } from "./zip.js";
16
+ import { auditLog } from "./logger.js";
17
+
18
+ export async function validateFile(filePath, options = {}) {
19
+ const {
20
+ allowedTypes = [
21
+ "png",
22
+ "jpg",
23
+ "jpeg",
24
+ "pdf",
25
+ "docx",
26
+ "xlsx",
27
+ "gif",
28
+ "bmp",
29
+ "tiff",
30
+ "mp3",
31
+ "wav",
32
+ "svg",
33
+ ],
34
+ maxSize = 50 * 1024 * 1024,
35
+ detectSuspicious = true,
36
+ } = options;
37
+
38
+ const ext = path.extname(filePath).slice(1).toLowerCase();
39
+
40
+ const stat = await fs.stat(filePath);
41
+
42
+ try {
43
+ if (!allowedTypes.includes(ext)) {
44
+ throw new Error(`Extension .${ext} not allowed`);
45
+ }
46
+
47
+ if (stat.size > maxSize) {
48
+ throw new Error("File too large");
49
+ }
50
+
51
+ const fd = await fs.open(filePath, "r");
52
+
53
+ let valid = false;
54
+ let suspicious = false;
55
+
56
+ try {
57
+ switch (ext) {
58
+ case "png":
59
+ valid = await isPNG(fd);
60
+ break;
61
+
62
+ case "jpg":
63
+ case "jpeg":
64
+ valid = await isJPEG(fd);
65
+ break;
66
+
67
+ case "pdf":
68
+ valid = await isPDF(fd);
69
+ break;
70
+
71
+ case "gif":
72
+ valid = await isGIF(fd);
73
+ break;
74
+
75
+ case "bmp":
76
+ valid = await isBMP(fd);
77
+ break;
78
+
79
+ case "tiff":
80
+ case "tif":
81
+ valid = await isTIFF(fd);
82
+ break;
83
+
84
+ case "mp3":
85
+ valid = await isMP3(fd);
86
+ break;
87
+
88
+ case "wav":
89
+ valid = await isWAV(fd);
90
+ break;
91
+
92
+ case "svg":
93
+ valid = await isSVG(fd);
94
+ break;
95
+
96
+ case "docx":
97
+ case "xlsx":
98
+ if (!(await isZIP(fd))) break;
99
+
100
+ const entries = await parseZipEntries(fd, stat.size);
101
+ valid = validateOffice(entries, ext);
102
+
103
+ if (detectSuspicious) {
104
+ suspicious = entries.some((e) =>
105
+ e.toLowerCase().includes("vbaproject.bin")
106
+ );
107
+ }
108
+ break;
109
+ }
110
+
111
+ // If the extension is allowed but we don't have a specific
112
+ // content detector for it, accept by extension only. This
113
+ // lets users whitelist arbitrary extensions (e.g. `--allow=js`).
114
+ const detectorHandled = [
115
+ "png",
116
+ "jpg",
117
+ "jpeg",
118
+ "pdf",
119
+ "gif",
120
+ "bmp",
121
+ "tiff",
122
+ "tif",
123
+ "mp3",
124
+ "wav",
125
+ "svg",
126
+ "docx",
127
+ "xlsx",
128
+ ].includes(ext);
129
+
130
+ if (!valid && !detectorHandled) {
131
+ valid = true;
132
+ }
133
+ } finally {
134
+ await fd.close();
135
+ }
136
+
137
+ if (!valid) {
138
+ throw new Error("File content invalid");
139
+ }
140
+
141
+ await auditLog({
142
+ file: filePath,
143
+ status: "valid",
144
+ ext,
145
+ size: stat.size,
146
+ suspicious,
147
+ });
148
+
149
+ return {
150
+ ext,
151
+ size: stat.size,
152
+ suspicious,
153
+ };
154
+ } catch (err) {
155
+ await auditLog({
156
+ file: filePath,
157
+ status: "invalid",
158
+ ext,
159
+ size: stat.size,
160
+ reason: err.message,
161
+ });
162
+
163
+ throw err;
164
+ }
165
+ }
package/src/zip.js ADDED
@@ -0,0 +1,53 @@
1
+ export async function parseZipEntries(fd, fileSize) {
2
+ const readSize = Math.min(65536, fileSize);
3
+ const tail = Buffer.alloc(readSize);
4
+
5
+ await fd.read(tail, 0, readSize, fileSize - readSize);
6
+
7
+ const sig = Buffer.from([0x50, 0x4b, 0x05, 0x06]);
8
+ const idx = tail.lastIndexOf(sig);
9
+
10
+ if (idx === -1) {
11
+ throw new Error("Invalid ZIP (no EOCD)");
12
+ }
13
+
14
+ const centralDirOffset = tail.readUInt32LE(idx + 16);
15
+ const header = Buffer.alloc(4096);
16
+
17
+ await fd.read(header, 0, 4096, centralDirOffset);
18
+
19
+ const entries = [];
20
+ let i = 0;
21
+
22
+ while (i < header.length - 46) {
23
+ if (header[i] !== 0x50 || header[i + 1] !== 0x4b) break;
24
+
25
+ const fileNameLength = header.readUInt16LE(i + 28);
26
+ const extraLength = header.readUInt16LE(i + 30);
27
+ const commentLength = header.readUInt16LE(i + 32);
28
+
29
+ const nameStart = i + 46;
30
+ const nameEnd = nameStart + fileNameLength;
31
+
32
+ const name = header.slice(nameStart, nameEnd).toString("utf-8");
33
+ entries.push(name);
34
+
35
+ i = nameEnd + extraLength + commentLength;
36
+ }
37
+
38
+ return entries;
39
+ }
40
+
41
+ export function validateOffice(entries, type) {
42
+ if (!entries.includes("[Content_Types].xml")) return false;
43
+
44
+ if (type === "docx") {
45
+ return entries.some((e) => e.startsWith("word/"));
46
+ }
47
+
48
+ if (type === "xlsx") {
49
+ return entries.some((e) => e.startsWith("xl/"));
50
+ }
51
+
52
+ return false;
53
+ }