pompelmi 0.0.1-alpha.0 → 0.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 pompelmi
3
+ Copyright (c) 2025 Tommaso Bertocchi
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,19 +1,364 @@
1
- # pompelmi (alpha)
1
+ <p align="center">
2
+ <a href="https://github.com/pompelmi/pompelmi" target="_blank" rel="noopener noreferrer">
3
+ <img
4
+ src="https://raw.githubusercontent.com/pompelmi/pompelmi/refs/heads/main/assets/logo.svg"
5
+ alt="pompelmi"
6
+ width="120"
7
+ height="120"
8
+ />
9
+ </a>
10
+ </p>
2
11
 
3
- Placeholder release per bloccare il nome su npm.
12
+
13
+ <h1 align="center">pompelmi</h1>
14
+
15
+ <p align="center">
16
+ Light-weight file scanner with optional <strong>YARA</strong> integration.<br/>
17
+ Works out-of-the-box in <strong>Node.js</strong>; supports <strong>browser</strong> via an HTTP remote engine.
18
+ </p>
19
+
20
+ <p align="center">
21
+ <a href="https://www.npmjs.com/package/pompelmi">
22
+ <img alt="npm" src="https://img.shields.io/npm/v/pompelmi?label=pompelmi">
23
+ </a>
24
+ <a href="https://www.npmjs.com/package/pompelmi">
25
+ <img alt="downloads" src="https://img.shields.io/npm/dw/pompelmi">
26
+ </a>
27
+ <a href="https://github.com/pompelmi/pompelmi/blob/main/LICENSE">
28
+ <img alt="license" src="https://img.shields.io/npm/l/pompelmi">
29
+ </a>
30
+ <img alt="node" src="https://img.shields.io/node/v/pompelmi">
31
+ <img alt="types" src="https://img.shields.io/badge/types-TypeScript-3178C6?logo=typescript&logoColor=white">
32
+ <img alt="status" src="https://img.shields.io/badge/channel-alpha-orange">
33
+ </p>
34
+
35
+ <p align="center">
36
+ <a href="#features">Features</a> •
37
+ <a href="#install">Install</a> •
38
+ <a href="#quickstart">Quickstart</a> •
39
+ <a href="#api">API</a> •
40
+ <a href="#browser-remote-yara">Browser (Remote YARA)</a> •
41
+ <a href="#examples">Examples</a> •
42
+ <a href="#faq">FAQ</a> •
43
+ <a href="#contributing">Contributing</a> •
44
+ <a href="#license">License</a>
45
+ </p>
46
+
47
+ ---
48
+
49
+ ## Features
50
+
51
+ - **Node.js first**: recursive directory scanning with **YARA** (no brew/apt required).
52
+ - **Flexible YARA rules**: from `.yar` file or inline string.
53
+ - **Smart scanning path**:
54
+ - `scanFileAsync` → `scanFile` → `scan(buffer)` (with optional **sampling** of the first N bytes).
55
+ - **Policies & filters**:
56
+ - include extensions, max file size, buffer-only mode, async preference, sampling bytes.
57
+ - **Structured results** per file:
58
+ - `matches`, `status`, `reason`, `mode`, derived **`verdict`**: `malicious | suspicious | clean`.
59
+ - **Browser support** via **Remote Engine** (HTTP endpoint):
60
+ - `multipart` or `json-base64` (with `rulesB64` to avoid JSON escaping headaches).
61
+ - **TypeScript** types included. ESM & CJS builds, tree-shake friendly.
62
+
63
+ ---
4
64
 
5
65
  ## Install
6
66
 
7
67
  ```bash
8
- npm i -g pompelmi@alpha
68
+ # library
69
+ npm i pompelmi
70
+
71
+ # (dev) scripts / server example might use these
72
+ npm i -D tsx express multer cors
73
+ ```
74
+
75
+ > The Node YARA engine uses native binaries via platform packages (pulled automatically by dependencies). **No brew / apt** required for consumers.
76
+
77
+ ---
78
+
79
+ ## Quickstart
80
+
81
+ ### Node.js (scan a folder with YARA)
82
+
83
+ ```ts
84
+ import { scanDir } from 'pompelmi';
85
+ import { resolve } from 'node:path';
86
+
87
+ const opts = {
88
+ enableYara: true,
89
+ yaraRulesPath: resolve(process.cwd(), 'rules/demo.yar'),
90
+ // optional policies
91
+ includeExtensions: ['.txt', '.bin'],
92
+ maxFileSizeBytes: 10 * 1024 * 1024, // 10 MiB
93
+ yaraAsync: true,
94
+ };
95
+
96
+ for await (const entry of scanDir('./some-folder', opts)) {
97
+ // entry: { path, absPath, isDir, yara? }
98
+ console.log(entry.path, entry.yara);
99
+ }
100
+ ```
101
+
102
+ ### Browser (HTTP remote engine, no WASM)
103
+
104
+ ```ts
105
+ import { createRemoteEngine } from 'pompelmi';
106
+
107
+ const RULES = `
108
+ rule demo_contains_virus_literal {
109
+ strings: $a = "virus" ascii nocase
110
+ condition: $a
111
+ }
112
+ `;
113
+
114
+ async function scanFileInBrowser(file: File) {
115
+ const engine = await createRemoteEngine({
116
+ endpoint: 'http://localhost:8787/api/yara/scan',
117
+ // choose one:
118
+ // mode: 'multipart',
119
+ mode: 'json-base64',
120
+ rulesAsBase64: true, // sends rulesB64 in JSON
121
+ });
122
+
123
+ const compiled = await engine.compile(RULES);
124
+ const bytes = new Uint8Array(await file.arrayBuffer());
125
+ const matches = await compiled.scan(bytes);
126
+
127
+ console.log('REMOTE MATCHES:', matches);
128
+ }
129
+ ```
130
+
131
+ ---
132
+
133
+ ## API
134
+
135
+ ### Node
136
+
137
+ #### `async function* scanDir(root: string, opts?: NodeScanOptions): AsyncGenerator<NodeFileEntry>`
138
+
139
+ Recursively scans `root` and yields entries with optional YARA results.
140
+
141
+ **`NodeScanOptions`**
142
+ ```ts
143
+ type NodeScanOptions = {
144
+ enableYara?: boolean; // default: false
145
+ yaraRules?: string; // inline rules
146
+ yaraRulesPath?: string; // path to .yar file
147
+
148
+ includeExtensions?: string[]; // ['.txt', '.bin']
149
+ maxFileSizeBytes?: number; // skip if size > threshold
150
+
151
+ yaraAsync?: boolean; // prefer scanFileAsync if available
152
+ yaraPreferBuffer?: boolean; // force buffer mode (enables sampling)
153
+ yaraSampleBytes?: number; // if buffer mode: scan first N bytes only
154
+ };
9
155
  ```
10
156
 
11
- ## Usage
157
+ **`NodeFileEntry`**
158
+ ```ts
159
+ type NodeFileEntry = {
160
+ path: string; // relative to root
161
+ absPath: string; // absolute
162
+ isDir: boolean;
163
+ yara?: NodeYaraResult;
164
+ };
165
+ ```
166
+
167
+ **`NodeYaraResult`**
168
+ ```ts
169
+ type NodeYaraVerdict = 'malicious' | 'suspicious' | 'clean';
170
+
171
+ type NodeYaraResult = {
172
+ matches: YaraMatch[];
173
+ status: 'scanned' | 'skipped' | 'error';
174
+ reason?: 'max-size' | 'filtered-ext' | 'not-enabled' | 'engine-missing' | 'error';
175
+ mode?: 'async' | 'file' | 'buffer' | 'buffer-sampled';
176
+ verdict?: NodeYaraVerdict; // when status === 'scanned'
177
+ };
178
+
179
+ type YaraMatch = {
180
+ rule: string;
181
+ tags?: string[];
182
+ };
183
+ ```
184
+
185
+ **Scanning path**
186
+ - If `yaraAsync` is true and engine exposes `scanFileAsync` → use it.
187
+ - Else if engine exposes `scanFile` → use it.
188
+ - Else → fallback to buffer mode (`scan(bytes)`).
189
+ - If `yaraSampleBytes` is set, only the first N bytes are read (sampling).
12
190
 
191
+ ---
192
+
193
+ ### Browser (Remote YARA)
194
+
195
+ #### `createRemoteEngine(options: RemoteEngineOptions)`
196
+
197
+ Creates an engine that **delegates** scanning to your HTTP endpoint.
198
+
199
+ ```ts
200
+ type RemoteEngineOptions = {
201
+ endpoint: string; // e.g. '/api/yara/scan'
202
+ headers?: Record<string, string>; // Authorization, etc.
203
+ rulesField?: string; // default 'rules' (multipart/json)
204
+ fileField?: string; // default 'file' (multipart/json)
205
+ mode?: 'multipart' | 'json-base64';// default 'multipart'
206
+ rulesAsBase64?: boolean; // if mode='json-base64', sends 'rulesB64'
207
+ };
208
+ ```
209
+
210
+ **Protocol**
211
+ - `multipart`: send `rules` (text or file) + `file` (binary).
212
+ - `json-base64`: send `{ rules: string, file: base64 }` or `{ rulesB64: base64, file: base64 }`.
213
+
214
+ **Returned engine**
215
+ - `await engine.compile(rulesSource)` → `compiled`
216
+ - `await compiled.scan(bytes)` → `YaraMatch[]`
217
+
218
+ ---
219
+
220
+ ## Browser (Remote YARA)
221
+
222
+ ### Example Express endpoint
223
+
224
+ ```ts
225
+ import express from 'express';
226
+ import multer from 'multer';
227
+ import cors from 'cors';
228
+ import { createYaraScannerFromRules } from 'pompelmi'; // or from './src/yara/index' in dev
229
+
230
+ const app = express();
231
+ const upload = multer();
232
+
233
+ app.use(cors({ origin: true, methods: ['POST','OPTIONS'], allowedHeaders: ['Content-Type','Authorization'] }));
234
+ app.use(express.json({ limit: '20mb' }));
235
+ app.options('/api/yara/scan', cors());
236
+
237
+ app.post('/api/yara/scan',
238
+ upload.fields([{ name: 'file', maxCount: 1 }, { name: 'rules', maxCount: 1 }]),
239
+ async (req, res) => {
240
+ try {
241
+ let rules = '';
242
+ let bytes: Uint8Array;
243
+
244
+ if (req.is('multipart/form-data')) {
245
+ const files = req.files as Record<string, Array<{ buffer: Buffer }>> | undefined;
246
+ if (files?.rules?.[0]) rules = files.rules[0].buffer.toString('utf8');
247
+ else rules = (req.body?.rules ?? '').toString();
248
+
249
+ const f = files?.file?.[0];
250
+ if (!f) return res.status(400).json({ error: 'file missing' });
251
+ bytes = new Uint8Array(f.buffer);
252
+ } else {
253
+ const rulesB64 = (req.body as any)?.rulesB64;
254
+ if (typeof rulesB64 === 'string') rules = Buffer.from(rulesB64, 'base64').toString('utf8');
255
+ else rules = (req.body?.rules ?? '').toString();
256
+
257
+ const b64 = (req.body as any)?.file;
258
+ if (typeof b64 !== 'string') return res.status(400).json({ error: 'file (base64) missing' });
259
+ bytes = Uint8Array.from(Buffer.from(b64, 'base64'));
260
+ }
261
+
262
+ if (!rules.trim()) return res.status(400).json({ error: 'rules empty' });
263
+
264
+ const compiled = await createYaraScannerFromRules(rules);
265
+ const matches = await compiled.scan(bytes);
266
+ res.json(matches);
267
+ } catch (err: any) {
268
+ console.error('[remote-yara] error', err);
269
+ res.status(500).json({ error: 'internal_error', detail: String(err?.message ?? err) });
270
+ }
271
+ }
272
+ );
273
+
274
+ app.listen(8787, () => {
275
+ console.log('[remote-yara] listening on http://localhost:8787');
276
+ });
277
+ ```
278
+
279
+ ---
280
+
281
+ ## Examples
282
+
283
+ - **Node integration smoke**
284
+ `npm run yara:int:smoke` – creates a temporary directory with sample files and runs several scenarios (rules from path/string, includeExtensions, maxFileSizeBytes, sampling miss/hit, async/file/buffer paths) with **assertions**.
285
+
286
+ - **Remote server (dev)**
287
+ `npm run dev:remote` – starts the Express endpoint shown above.
288
+
289
+ - **cURL examples**
13
290
  ```bash
14
- pompelmi --help
291
+ # multipart, rules as text
292
+ curl -sS -F file=@tmp-yara-int/sample.txt \
293
+ --form-string "rules=$(cat rules/demo.yar)" \
294
+ http://localhost:8787/api/yara/scan
295
+
296
+ # multipart, rules as file
297
+ curl -sS -F rules=@rules/demo.yar -F file=@tmp-yara-int/sample.txt \
298
+ http://localhost:8787/api/yara/scan
299
+
300
+ # JSON base64
301
+ FILE_B64=$(base64 -i tmp-yara-int/sample.txt | tr -d '\n')
302
+ RULES_B64=$(base64 -i rules/demo.yar | tr -d '\n')
303
+ curl -sS -H "Content-Type: application/json" \
304
+ --data "{\"rulesB64\": \"${RULES_B64}\", \"file\": \"${FILE_B64}\"}" \
305
+ http://localhost:8787/api/yara/scan
15
306
  ```
16
307
 
17
- (Work in progress)
308
+ ---
309
+
310
+ ## FAQ
311
+
312
+ **Does this detect all malware?**
313
+ No. It matches **YARA rules** you provide. That means detection quality depends on your rule set. No cloud reputation, sandboxing, or emulation is included.
314
+
315
+ **Browser scanning without WASM?**
316
+ Yes, via the **Remote Engine**: the browser posts bytes + rules to your server, your server runs YARA, and returns matches.
317
+
318
+ **Can I scan only a sample of each file?**
319
+ Yes. In Node buffer mode, set `yaraSampleBytes`. Or force buffer mode with `yaraPreferBuffer: true`.
320
+
321
+ **What about large directories?**
322
+ You can filter by extension and cap the file size (`maxFileSizeBytes`). Concurrency controls may be added in a future release.
323
+
324
+ ---
325
+
326
+ ## Security & Disclaimer
327
+
328
+ - This library **reads** files; it does not execute them.
329
+ - YARA detections depend entirely on the rules you supply. Expect **false positives** and **false negatives**.
330
+ - Always run scanning in a controlled environment with appropriate security controls.
331
+
332
+ ---
333
+
334
+ ## Contributing
335
+
336
+ PRs and issues are welcome!
337
+ Before submitting, please:
338
+
339
+ - Run the build and tests:
340
+ ```bash
341
+ npm run build
342
+ npm run yara:int:smoke
343
+ ```
344
+ - Keep commits focused and well described.
345
+ - For new features, consider adding/adjusting integration tests.
346
+
347
+ ---
348
+
349
+ ## Versioning
350
+
351
+ Current channel: **`0.2.0-alpha.x`**
352
+ This is a pre-release channel. Expect minor API changes before a stable `0.2.0`.
353
+
354
+ Publish suggestion:
355
+ ```bash
356
+ npm version 0.2.0-alpha.0
357
+ npm publish --tag next
358
+ ```
359
+
360
+ ---
361
+
362
+ ## License
18
363
 
19
- `LICENSE`.
364
+ [MIT](./LICENSE) © 2025-present pompelmi contributors
@@ -0,0 +1,86 @@
1
+ 'use strict';
2
+
3
+ var react = require('react');
4
+
5
+ /**
6
+ * Reads an array of File objects via FileReader and returns their text.
7
+ */
8
+ async function scanFiles(files) {
9
+ const readText = file => new Promise((resolve, reject) => {
10
+ const reader = new FileReader();
11
+ reader.onload = () => resolve(reader.result);
12
+ reader.onerror = () => reject(reader.error);
13
+ reader.readAsText(file);
14
+ });
15
+ const results = [];
16
+ for (const file of files) {
17
+ const content = await readText(file);
18
+ results.push({
19
+ file,
20
+ content
21
+ });
22
+ }
23
+ return results;
24
+ }
25
+
26
+ /**
27
+ * Validates a File by MIME type and size (max 5 MB).
28
+ */
29
+ function validateFile(file) {
30
+ const maxSize = 5 * 1024 * 1024;
31
+ const allowedTypes = ['text/plain', 'application/json', 'text/csv'];
32
+ if (!allowedTypes.includes(file.type)) {
33
+ return {
34
+ valid: false,
35
+ error: 'Unsupported file type'
36
+ };
37
+ }
38
+ if (file.size > maxSize) {
39
+ return {
40
+ valid: false,
41
+ error: 'File too large (max 5 MB)'
42
+ };
43
+ }
44
+ return {
45
+ valid: true
46
+ };
47
+ }
48
+
49
+ /**
50
+ * React Hook: handles <input type="file" onChange> with validation + scanning.
51
+ */
52
+ function useFileScanner() {
53
+ const [results, setResults] = react.useState([]);
54
+ const [errors, setErrors] = react.useState([]);
55
+ const onChange = react.useCallback(async e => {
56
+ const fileList = Array.from(e.target.files || []);
57
+ const good = [];
58
+ const bad = [];
59
+ for (const file of fileList) {
60
+ const {
61
+ valid,
62
+ error
63
+ } = validateFile(file);
64
+ if (valid) good.push(file);else bad.push({
65
+ file,
66
+ error: error
67
+ });
68
+ }
69
+ setErrors(bad);
70
+ if (good.length) {
71
+ const scanned = await scanFiles(good);
72
+ setResults(scanned);
73
+ } else {
74
+ setResults([]);
75
+ }
76
+ }, []);
77
+ return {
78
+ results,
79
+ errors,
80
+ onChange
81
+ };
82
+ }
83
+
84
+ exports.scanFiles = scanFiles;
85
+ exports.useFileScanner = useFileScanner;
86
+ exports.validateFile = validateFile;
@@ -0,0 +1,82 @@
1
+ import { useState, useCallback } from 'react';
2
+
3
+ /**
4
+ * Reads an array of File objects via FileReader and returns their text.
5
+ */
6
+ async function scanFiles(files) {
7
+ const readText = file => new Promise((resolve, reject) => {
8
+ const reader = new FileReader();
9
+ reader.onload = () => resolve(reader.result);
10
+ reader.onerror = () => reject(reader.error);
11
+ reader.readAsText(file);
12
+ });
13
+ const results = [];
14
+ for (const file of files) {
15
+ const content = await readText(file);
16
+ results.push({
17
+ file,
18
+ content
19
+ });
20
+ }
21
+ return results;
22
+ }
23
+
24
+ /**
25
+ * Validates a File by MIME type and size (max 5 MB).
26
+ */
27
+ function validateFile(file) {
28
+ const maxSize = 5 * 1024 * 1024;
29
+ const allowedTypes = ['text/plain', 'application/json', 'text/csv'];
30
+ if (!allowedTypes.includes(file.type)) {
31
+ return {
32
+ valid: false,
33
+ error: 'Unsupported file type'
34
+ };
35
+ }
36
+ if (file.size > maxSize) {
37
+ return {
38
+ valid: false,
39
+ error: 'File too large (max 5 MB)'
40
+ };
41
+ }
42
+ return {
43
+ valid: true
44
+ };
45
+ }
46
+
47
+ /**
48
+ * React Hook: handles <input type="file" onChange> with validation + scanning.
49
+ */
50
+ function useFileScanner() {
51
+ const [results, setResults] = useState([]);
52
+ const [errors, setErrors] = useState([]);
53
+ const onChange = useCallback(async e => {
54
+ const fileList = Array.from(e.target.files || []);
55
+ const good = [];
56
+ const bad = [];
57
+ for (const file of fileList) {
58
+ const {
59
+ valid,
60
+ error
61
+ } = validateFile(file);
62
+ if (valid) good.push(file);else bad.push({
63
+ file,
64
+ error: error
65
+ });
66
+ }
67
+ setErrors(bad);
68
+ if (good.length) {
69
+ const scanned = await scanFiles(good);
70
+ setResults(scanned);
71
+ } else {
72
+ setResults([]);
73
+ }
74
+ }, []);
75
+ return {
76
+ results,
77
+ errors,
78
+ onChange
79
+ };
80
+ }
81
+
82
+ export { scanFiles, useFileScanner, validateFile };