openclaw-plugin-vt-sentinel 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts +23 -0
- package/dist/cache.js +61 -0
- package/dist/classifier.d.ts +56 -0
- package/dist/classifier.js +410 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.js +832 -0
- package/dist/path-extractor.d.ts +76 -0
- package/dist/path-extractor.js +761 -0
- package/dist/scanner.d.ts +96 -0
- package/dist/scanner.js +312 -0
- package/dist/vt-api.d.ts +69 -0
- package/dist/vt-api.js +231 -0
- package/hooks/vt-auto-scan/HOOK.md +25 -0
- package/hooks/vt-auto-scan/handler.js +106 -0
- package/openclaw.plugin.json +43 -0
- package/package.json +44 -0
- package/skills/vt-sentinel/SKILL.md +129 -0
package/dist/cache.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory cache with TTL and rate limiter for VT API.
|
|
3
|
+
* Free tier: 4 requests/minute, 500 requests/day.
|
|
4
|
+
*/
|
|
5
|
+
export declare class Cache<T> {
|
|
6
|
+
private store;
|
|
7
|
+
private ttlMs;
|
|
8
|
+
constructor(ttlMinutes?: number);
|
|
9
|
+
get(key: string): T | null;
|
|
10
|
+
set(key: string, value: T): void;
|
|
11
|
+
has(key: string): boolean;
|
|
12
|
+
clear(): void;
|
|
13
|
+
}
|
|
14
|
+
export declare class RateLimiter {
|
|
15
|
+
private timestamps;
|
|
16
|
+
private maxRequests;
|
|
17
|
+
private windowMs;
|
|
18
|
+
constructor(maxPerMinute?: number);
|
|
19
|
+
/**
|
|
20
|
+
* Wait until a request slot is available, then consume it.
|
|
21
|
+
*/
|
|
22
|
+
acquire(): Promise<void>;
|
|
23
|
+
}
|
package/dist/cache.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* In-memory cache with TTL and rate limiter for VT API.
|
|
4
|
+
* Free tier: 4 requests/minute, 500 requests/day.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.RateLimiter = exports.Cache = void 0;
|
|
8
|
+
class Cache {
|
|
9
|
+
constructor(ttlMinutes = 15) {
|
|
10
|
+
this.store = new Map();
|
|
11
|
+
this.ttlMs = ttlMinutes * 60 * 1000;
|
|
12
|
+
}
|
|
13
|
+
get(key) {
|
|
14
|
+
const entry = this.store.get(key);
|
|
15
|
+
if (!entry)
|
|
16
|
+
return null;
|
|
17
|
+
if (Date.now() > entry.expiresAt) {
|
|
18
|
+
this.store.delete(key);
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
return entry.value;
|
|
22
|
+
}
|
|
23
|
+
set(key, value) {
|
|
24
|
+
this.store.set(key, {
|
|
25
|
+
value,
|
|
26
|
+
expiresAt: Date.now() + this.ttlMs,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
has(key) {
|
|
30
|
+
return this.get(key) !== null;
|
|
31
|
+
}
|
|
32
|
+
clear() {
|
|
33
|
+
this.store.clear();
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
exports.Cache = Cache;
|
|
37
|
+
class RateLimiter {
|
|
38
|
+
constructor(maxPerMinute = 4) {
|
|
39
|
+
this.timestamps = [];
|
|
40
|
+
this.maxRequests = maxPerMinute;
|
|
41
|
+
this.windowMs = 60 * 1000;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Wait until a request slot is available, then consume it.
|
|
45
|
+
*/
|
|
46
|
+
async acquire() {
|
|
47
|
+
while (true) {
|
|
48
|
+
const now = Date.now();
|
|
49
|
+
// Purge timestamps outside the window
|
|
50
|
+
this.timestamps = this.timestamps.filter((t) => now - t < this.windowMs);
|
|
51
|
+
if (this.timestamps.length < this.maxRequests) {
|
|
52
|
+
this.timestamps.push(now);
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
// Wait until the oldest request exits the window
|
|
56
|
+
const waitMs = this.timestamps[0] + this.windowMs - now + 50;
|
|
57
|
+
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
exports.RateLimiter = RateLimiter;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
export declare enum FileCategory {
|
|
2
|
+
HIGH_RISK = "HIGH_RISK",
|
|
3
|
+
SEMANTIC_RISK = "SEMANTIC_RISK",
|
|
4
|
+
SENSITIVE = "SENSITIVE",
|
|
5
|
+
MEDIA = "MEDIA",
|
|
6
|
+
SAFE = "SAFE"
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Content-first file classifier.
|
|
10
|
+
*
|
|
11
|
+
* Classification priority:
|
|
12
|
+
* 1. Magic bytes (binary formats — definitive)
|
|
13
|
+
* 2. Shebang #! (executable scripts — definitive)
|
|
14
|
+
* 3. OpenClaw semantic filenames (SKILL.md, HOOK.md, etc.)
|
|
15
|
+
* 4. Text content analysis (script patterns in plain text)
|
|
16
|
+
* 5. Default → SAFE
|
|
17
|
+
*
|
|
18
|
+
* Extensions are NEVER used as the sole classification signal.
|
|
19
|
+
* This prevents both false positives (private PDF renamed .sh → would be uploaded)
|
|
20
|
+
* and false negatives (PE binary renamed .txt → would be skipped).
|
|
21
|
+
*/
|
|
22
|
+
export declare class FileClassifier {
|
|
23
|
+
private static readonly MAGIC;
|
|
24
|
+
private static readonly SEMANTIC_FILENAMES;
|
|
25
|
+
private static readonly SCRIPT_PATTERNS;
|
|
26
|
+
private static readonly SCRIPT_PATTERN_THRESHOLD;
|
|
27
|
+
private static readonly ZIP_SKILL_MARKERS;
|
|
28
|
+
private static readonly ZIP_EXEC_EXTENSIONS;
|
|
29
|
+
private static readonly ZIP_OOXML_MARKERS;
|
|
30
|
+
private static readHeader;
|
|
31
|
+
private static matchMagic;
|
|
32
|
+
private static isShebang;
|
|
33
|
+
private static isFtyp;
|
|
34
|
+
/**
|
|
35
|
+
* Check if a buffer looks like valid UTF-8 text (no null bytes,
|
|
36
|
+
* no control chars other than \t \n \r in the first chunk).
|
|
37
|
+
*/
|
|
38
|
+
private static isLikelyText;
|
|
39
|
+
/**
|
|
40
|
+
* Peek inside a ZIP file by reading its local file headers.
|
|
41
|
+
* ZIP local file header: PK\x03\x04 ... filename at offset 30.
|
|
42
|
+
* We read enough to scan the first ~50 entries without full decompression.
|
|
43
|
+
*
|
|
44
|
+
* Returns: 'skill' | 'executable' | 'office' | 'archive'
|
|
45
|
+
*/
|
|
46
|
+
private static inspectZip;
|
|
47
|
+
/**
|
|
48
|
+
* Read a larger chunk and test for script-like content patterns.
|
|
49
|
+
*/
|
|
50
|
+
private static detectScriptContent;
|
|
51
|
+
/**
|
|
52
|
+
* Classify a file into a security category.
|
|
53
|
+
* Uses ONLY magic bytes and content analysis — never extensions alone.
|
|
54
|
+
*/
|
|
55
|
+
static classify(filePath: string): FileCategory;
|
|
56
|
+
}
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.FileClassifier = exports.FileCategory = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
var FileCategory;
|
|
40
|
+
(function (FileCategory) {
|
|
41
|
+
FileCategory["HIGH_RISK"] = "HIGH_RISK";
|
|
42
|
+
FileCategory["SEMANTIC_RISK"] = "SEMANTIC_RISK";
|
|
43
|
+
FileCategory["SENSITIVE"] = "SENSITIVE";
|
|
44
|
+
FileCategory["MEDIA"] = "MEDIA";
|
|
45
|
+
FileCategory["SAFE"] = "SAFE";
|
|
46
|
+
})(FileCategory || (exports.FileCategory = FileCategory = {}));
|
|
47
|
+
/**
|
|
48
|
+
* Content-first file classifier.
|
|
49
|
+
*
|
|
50
|
+
* Classification priority:
|
|
51
|
+
* 1. Magic bytes (binary formats — definitive)
|
|
52
|
+
* 2. Shebang #! (executable scripts — definitive)
|
|
53
|
+
* 3. OpenClaw semantic filenames (SKILL.md, HOOK.md, etc.)
|
|
54
|
+
* 4. Text content analysis (script patterns in plain text)
|
|
55
|
+
* 5. Default → SAFE
|
|
56
|
+
*
|
|
57
|
+
* Extensions are NEVER used as the sole classification signal.
|
|
58
|
+
* This prevents both false positives (private PDF renamed .sh → would be uploaded)
|
|
59
|
+
* and false negatives (PE binary renamed .txt → would be skipped).
|
|
60
|
+
*/
|
|
61
|
+
class FileClassifier {
|
|
62
|
+
// ── Low-level helpers ──────────────────────────────────────────────
|
|
63
|
+
static readHeader(filePath, length) {
|
|
64
|
+
try {
|
|
65
|
+
const fd = fs.openSync(filePath, 'r');
|
|
66
|
+
const buf = Buffer.alloc(length);
|
|
67
|
+
const bytesRead = fs.readSync(fd, buf, 0, length, 0);
|
|
68
|
+
fs.closeSync(fd);
|
|
69
|
+
return bytesRead < length ? buf.subarray(0, bytesRead) : buf;
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
static matchMagic(buf, magic) {
|
|
76
|
+
if (buf.length < magic.length)
|
|
77
|
+
return false;
|
|
78
|
+
for (let i = 0; i < magic.length; i++) {
|
|
79
|
+
if (buf[i] !== magic[i])
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
static isShebang(buf) {
|
|
85
|
+
return buf.length >= 2 && buf[0] === 0x23 && buf[1] === 0x21;
|
|
86
|
+
}
|
|
87
|
+
static isFtyp(buf) {
|
|
88
|
+
return buf.length >= 8 &&
|
|
89
|
+
buf[4] === 0x66 && buf[5] === 0x74 &&
|
|
90
|
+
buf[6] === 0x79 && buf[7] === 0x70;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Check if a buffer looks like valid UTF-8 text (no null bytes,
|
|
94
|
+
* no control chars other than \t \n \r in the first chunk).
|
|
95
|
+
*/
|
|
96
|
+
static isLikelyText(buf) {
|
|
97
|
+
for (let i = 0; i < buf.length; i++) {
|
|
98
|
+
const b = buf[i];
|
|
99
|
+
if (b === 0x00)
|
|
100
|
+
return false; // null byte → binary
|
|
101
|
+
if (b < 0x20 && b !== 0x09 && b !== 0x0A && b !== 0x0D)
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Peek inside a ZIP file by reading its local file headers.
|
|
108
|
+
* ZIP local file header: PK\x03\x04 ... filename at offset 30.
|
|
109
|
+
* We read enough to scan the first ~50 entries without full decompression.
|
|
110
|
+
*
|
|
111
|
+
* Returns: 'skill' | 'executable' | 'office' | 'archive'
|
|
112
|
+
*/
|
|
113
|
+
static inspectZip(filePath) {
|
|
114
|
+
// Read up to 64KB to scan ZIP directory entries
|
|
115
|
+
const buf = this.readHeader(filePath, 65536);
|
|
116
|
+
if (!buf || buf.length < 30)
|
|
117
|
+
return 'archive';
|
|
118
|
+
const entryNames = [];
|
|
119
|
+
let offset = 0;
|
|
120
|
+
// Walk through ZIP local file headers (PK\x03\x04)
|
|
121
|
+
while (offset + 30 <= buf.length) {
|
|
122
|
+
// Check local file header signature
|
|
123
|
+
if (buf[offset] !== 0x50 || buf[offset + 1] !== 0x4B ||
|
|
124
|
+
buf[offset + 2] !== 0x03 || buf[offset + 3] !== 0x04) {
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
const flags = buf.readUInt16LE(offset + 6);
|
|
128
|
+
const compressedSize = buf.readUInt32LE(offset + 18);
|
|
129
|
+
const nameLen = buf.readUInt16LE(offset + 26);
|
|
130
|
+
const extraLen = buf.readUInt16LE(offset + 28);
|
|
131
|
+
const hasDataDescriptor = (flags & 0x0008) !== 0;
|
|
132
|
+
if (offset + 30 + nameLen > buf.length)
|
|
133
|
+
break;
|
|
134
|
+
const name = buf.subarray(offset + 30, offset + 30 + nameLen).toString('utf-8').toLowerCase();
|
|
135
|
+
entryNames.push(name);
|
|
136
|
+
if (compressedSize === 0 && hasDataDescriptor) {
|
|
137
|
+
// Data descriptor: actual size follows the compressed data, not in the header.
|
|
138
|
+
// Scan forward for the next local file header (PK\x03\x04) or central dir (PK\x01\x02).
|
|
139
|
+
let nextOffset = offset + 30 + nameLen + extraLen;
|
|
140
|
+
let found = false;
|
|
141
|
+
while (nextOffset + 4 <= buf.length) {
|
|
142
|
+
if (buf[nextOffset] === 0x50 && buf[nextOffset + 1] === 0x4B &&
|
|
143
|
+
((buf[nextOffset + 2] === 0x03 && buf[nextOffset + 3] === 0x04) ||
|
|
144
|
+
(buf[nextOffset + 2] === 0x01 && buf[nextOffset + 3] === 0x02))) {
|
|
145
|
+
offset = nextOffset;
|
|
146
|
+
found = true;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
nextOffset++;
|
|
150
|
+
}
|
|
151
|
+
if (!found)
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
// Normal entry: advance past header + name + extra + compressed data
|
|
156
|
+
offset += 30 + nameLen + extraLen + compressedSize;
|
|
157
|
+
}
|
|
158
|
+
// Safety: stop after 100 entries
|
|
159
|
+
if (entryNames.length >= 100)
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
if (entryNames.length === 0)
|
|
163
|
+
return 'archive';
|
|
164
|
+
// Check for skill/hook package markers
|
|
165
|
+
for (const entry of entryNames) {
|
|
166
|
+
const basename = entry.split('/').pop() || '';
|
|
167
|
+
if (this.ZIP_SKILL_MARKERS.includes(basename)) {
|
|
168
|
+
return 'skill';
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// Check for executables inside
|
|
172
|
+
for (const entry of entryNames) {
|
|
173
|
+
for (const ext of this.ZIP_EXEC_EXTENSIONS) {
|
|
174
|
+
if (entry.endsWith(ext))
|
|
175
|
+
return 'executable';
|
|
176
|
+
}
|
|
177
|
+
// macOS .app bundle: Contents/MacOS/ contains the actual binary
|
|
178
|
+
if (/\.app\/contents\/macos\//i.test(entry))
|
|
179
|
+
return 'executable';
|
|
180
|
+
}
|
|
181
|
+
// Check for OOXML (Office document)
|
|
182
|
+
for (const entry of entryNames) {
|
|
183
|
+
for (const marker of this.ZIP_OOXML_MARKERS) {
|
|
184
|
+
if (entry === marker || entry.startsWith(marker))
|
|
185
|
+
return 'office';
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return 'archive';
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Read a larger chunk and test for script-like content patterns.
|
|
192
|
+
*/
|
|
193
|
+
static detectScriptContent(filePath) {
|
|
194
|
+
const chunk = this.readHeader(filePath, 4096);
|
|
195
|
+
if (!chunk || chunk.length === 0)
|
|
196
|
+
return false;
|
|
197
|
+
if (!this.isLikelyText(chunk))
|
|
198
|
+
return false;
|
|
199
|
+
const text = chunk.toString('utf-8');
|
|
200
|
+
let matches = 0;
|
|
201
|
+
for (const pat of this.SCRIPT_PATTERNS) {
|
|
202
|
+
if (pat.test(text)) {
|
|
203
|
+
matches++;
|
|
204
|
+
if (matches >= this.SCRIPT_PATTERN_THRESHOLD)
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
// ── Main classifier ────────────────────────────────────────────────
|
|
211
|
+
/**
|
|
212
|
+
* Classify a file into a security category.
|
|
213
|
+
* Uses ONLY magic bytes and content analysis — never extensions alone.
|
|
214
|
+
*/
|
|
215
|
+
static classify(filePath) {
|
|
216
|
+
const header = this.readHeader(filePath, 20);
|
|
217
|
+
if (!header || header.length === 0)
|
|
218
|
+
return FileCategory.SAFE;
|
|
219
|
+
// ── Phase 1: Magic bytes (definitive binary identification) ────
|
|
220
|
+
// 1a. Executable binaries → HIGH_RISK (auto-upload OK, no privacy concern)
|
|
221
|
+
if (this.matchMagic(header, this.MAGIC.PE) ||
|
|
222
|
+
this.matchMagic(header, this.MAGIC.ELF) ||
|
|
223
|
+
this.matchMagic(header, this.MAGIC.MACH_O_32) ||
|
|
224
|
+
this.matchMagic(header, this.MAGIC.MACH_O_64) ||
|
|
225
|
+
this.matchMagic(header, this.MAGIC.MACH_O_REV) ||
|
|
226
|
+
this.matchMagic(header, this.MAGIC.MACH_O_64_REV)) {
|
|
227
|
+
return FileCategory.HIGH_RISK;
|
|
228
|
+
}
|
|
229
|
+
// 1a-bis. 0xCAFEBABE / 0xBEBAFECA: Mach-O fat/universal OR Java .class file.
|
|
230
|
+
// Both are executable code → HIGH_RISK regardless.
|
|
231
|
+
if (this.matchMagic(header, this.MAGIC.MACH_O_FAT) ||
|
|
232
|
+
this.matchMagic(header, this.MAGIC.MACH_O_FAT_REV)) {
|
|
233
|
+
return FileCategory.HIGH_RISK;
|
|
234
|
+
}
|
|
235
|
+
// 1a-ter. macOS PKG installer (XAR archive) → HIGH_RISK
|
|
236
|
+
if (this.matchMagic(header, this.MAGIC.XAR)) {
|
|
237
|
+
return FileCategory.HIGH_RISK;
|
|
238
|
+
}
|
|
239
|
+
// 1b. Documents → SENSITIVE (may contain private data, hash-only)
|
|
240
|
+
if (this.matchMagic(header, this.MAGIC.PDF) ||
|
|
241
|
+
this.matchMagic(header, this.MAGIC.OLE)) {
|
|
242
|
+
return FileCategory.SENSITIVE;
|
|
243
|
+
}
|
|
244
|
+
// 1c. ZIP containers — inspect contents to decide
|
|
245
|
+
if (this.matchMagic(header, this.MAGIC.ZIP)) {
|
|
246
|
+
const zipType = this.inspectZip(filePath);
|
|
247
|
+
switch (zipType) {
|
|
248
|
+
case 'skill': return FileCategory.SEMANTIC_RISK; // Skill/hook package → auto-upload + Code Insight
|
|
249
|
+
case 'executable': return FileCategory.HIGH_RISK; // Contains binaries → auto-upload
|
|
250
|
+
case 'office': return FileCategory.SENSITIVE; // OOXML doc → user consent required
|
|
251
|
+
default: return FileCategory.SENSITIVE; // Unknown archive → user consent required
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
// 1d. Compressed archives → SENSITIVE (can't inspect contents, may be private)
|
|
255
|
+
if (this.matchMagic(header, this.MAGIC.GZIP) ||
|
|
256
|
+
this.matchMagic(header, this.MAGIC.SEVENZ) ||
|
|
257
|
+
this.matchMagic(header, this.MAGIC.RAR) ||
|
|
258
|
+
this.matchMagic(header, this.MAGIC.XZ) ||
|
|
259
|
+
this.matchMagic(header, this.MAGIC.BZ2) ||
|
|
260
|
+
this.matchMagic(header, this.MAGIC.DEB) ||
|
|
261
|
+
this.matchMagic(header, this.MAGIC.RPM)) {
|
|
262
|
+
return FileCategory.SENSITIVE;
|
|
263
|
+
}
|
|
264
|
+
// 1e. Windows executable containers → HIGH_RISK
|
|
265
|
+
if (this.matchMagic(header, this.MAGIC.CHM) ||
|
|
266
|
+
this.matchMagic(header, this.MAGIC.CAB)) {
|
|
267
|
+
return FileCategory.HIGH_RISK;
|
|
268
|
+
}
|
|
269
|
+
// 1e-bis. Windows Shell Link (.lnk) — top malware delivery vector.
|
|
270
|
+
// LNK files can embed arbitrary commands, PowerShell, download payloads.
|
|
271
|
+
if (this.matchMagic(header, this.MAGIC.LNK)) {
|
|
272
|
+
return FileCategory.HIGH_RISK;
|
|
273
|
+
}
|
|
274
|
+
// 1f. Media → MEDIA (skip scanning)
|
|
275
|
+
if (this.matchMagic(header, this.MAGIC.PNG) ||
|
|
276
|
+
this.matchMagic(header, this.MAGIC.JPG) ||
|
|
277
|
+
this.matchMagic(header, this.MAGIC.GIF) ||
|
|
278
|
+
this.matchMagic(header, this.MAGIC.RIFF) ||
|
|
279
|
+
this.matchMagic(header, this.MAGIC.MKV) ||
|
|
280
|
+
this.matchMagic(header, this.MAGIC.FLAC) ||
|
|
281
|
+
this.matchMagic(header, this.MAGIC.OGG) ||
|
|
282
|
+
this.matchMagic(header, this.MAGIC.BMP) ||
|
|
283
|
+
this.isFtyp(header)) {
|
|
284
|
+
return FileCategory.MEDIA;
|
|
285
|
+
}
|
|
286
|
+
// ── Phase 2: Shebang (definitive script identification) ────────
|
|
287
|
+
if (this.isShebang(header)) {
|
|
288
|
+
return FileCategory.HIGH_RISK;
|
|
289
|
+
}
|
|
290
|
+
// ── Phase 2b: Windows Registry script (definitive text header) ──
|
|
291
|
+
// .reg files can modify auto-run keys, disable security features,
|
|
292
|
+
// hijack file associations. Definitive prefix — not heuristic.
|
|
293
|
+
// Need 40+ bytes to match "Windows Registry Editor Version 5.00".
|
|
294
|
+
if (this.isLikelyText(header)) {
|
|
295
|
+
const textChunk = this.readHeader(filePath, 48);
|
|
296
|
+
if (textChunk && textChunk.length >= 8) {
|
|
297
|
+
const headerText = textChunk.toString('utf-8');
|
|
298
|
+
if (headerText.startsWith('Windows Registry Editor') || headerText.startsWith('REGEDIT4')) {
|
|
299
|
+
return FileCategory.HIGH_RISK;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
// ── Phase 3: OpenClaw semantic files (by canonical filename) ───
|
|
304
|
+
const basename = path.basename(filePath).toLowerCase();
|
|
305
|
+
if (this.SEMANTIC_FILENAMES.has(basename)) {
|
|
306
|
+
return FileCategory.SEMANTIC_RISK;
|
|
307
|
+
}
|
|
308
|
+
// ── Phase 4: Content-based script detection ────────────────────
|
|
309
|
+
// Read a larger chunk, verify it's text, match script patterns.
|
|
310
|
+
// Requires ≥2 pattern matches to reduce false positives.
|
|
311
|
+
if (this.detectScriptContent(filePath)) {
|
|
312
|
+
return FileCategory.HIGH_RISK;
|
|
313
|
+
}
|
|
314
|
+
// ── Phase 5: Default → SAFE ────────────────────────────────────
|
|
315
|
+
// Unknown format. Don't upload, don't scan. Conservative default.
|
|
316
|
+
return FileCategory.SAFE;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
exports.FileClassifier = FileClassifier;
|
|
320
|
+
// ── Magic byte signatures ──────────────────────────────────────────
|
|
321
|
+
FileClassifier.MAGIC = {
|
|
322
|
+
// Executables
|
|
323
|
+
PE: [0x4D, 0x5A], // MZ — Windows PE
|
|
324
|
+
ELF: [0x7F, 0x45, 0x4C, 0x46], // .ELF — Linux
|
|
325
|
+
MACH_O_32: [0xFE, 0xED, 0xFA, 0xCE], // Mach-O 32-bit
|
|
326
|
+
MACH_O_64: [0xFE, 0xED, 0xFA, 0xCF], // Mach-O 64-bit
|
|
327
|
+
MACH_O_REV: [0xCE, 0xFA, 0xED, 0xFE], // MH_CIGAM — Mach-O 32-bit reversed
|
|
328
|
+
MACH_O_64_REV: [0xCF, 0xFA, 0xED, 0xFE], // MH_CIGAM_64 — Mach-O 64-bit reversed (most common on Intel Mac)
|
|
329
|
+
MACH_O_FAT: [0xCA, 0xFE, 0xBA, 0xBE], // FAT_MAGIC — Mach-O fat/universal
|
|
330
|
+
MACH_O_FAT_REV: [0xBE, 0xBA, 0xFE, 0xCA], // FAT_CIGAM — Mach-O fat reversed
|
|
331
|
+
// Documents (potentially private)
|
|
332
|
+
PDF: [0x25, 0x50, 0x44, 0x46], // %PDF
|
|
333
|
+
OLE: [0xD0, 0xCF, 0x11, 0xE0], // OLE2 (legacy Office)
|
|
334
|
+
ZIP: [0x50, 0x4B, 0x03, 0x04], // PK (zip, docx, xlsx, jar…)
|
|
335
|
+
OOXML: [0x50, 0x4B, 0x03, 0x04], // Same as ZIP — modern Office
|
|
336
|
+
// Compressed archives (can't peek inside — treat as SENSITIVE)
|
|
337
|
+
GZIP: [0x1F, 0x8B], // gzip (.gz, .tar.gz, .tgz)
|
|
338
|
+
SEVENZ: [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C], // 7-Zip
|
|
339
|
+
RAR: [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07], // Rar!..
|
|
340
|
+
XZ: [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], // .xz
|
|
341
|
+
BZ2: [0x42, 0x5A, 0x68], // BZh — bzip2
|
|
342
|
+
// Linux package formats
|
|
343
|
+
DEB: [0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E, 0x0A], // !<arch>\n — Debian .deb (ar archive)
|
|
344
|
+
RPM: [0xED, 0xAB, 0xEE, 0xDB], // RPM package
|
|
345
|
+
// macOS distribution formats
|
|
346
|
+
XAR: [0x78, 0x61, 0x72, 0x21], // xar! — macOS PKG installer
|
|
347
|
+
// Windows containers (executable payloads)
|
|
348
|
+
LNK: [0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00,
|
|
349
|
+
0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00,
|
|
350
|
+
0x00, 0x00, 0x00, 0x46], // Shell Link (.lnk) — 20-byte CLSID
|
|
351
|
+
CHM: [0x49, 0x54, 0x53, 0x46], // ITSF — Compiled HTML Help
|
|
352
|
+
CAB: [0x4D, 0x53, 0x43, 0x46], // MSCF — Cabinet archive
|
|
353
|
+
// Media
|
|
354
|
+
PNG: [0x89, 0x50, 0x4E, 0x47],
|
|
355
|
+
JPG: [0xFF, 0xD8, 0xFF],
|
|
356
|
+
GIF: [0x47, 0x49, 0x46, 0x38],
|
|
357
|
+
RIFF: [0x52, 0x49, 0x46, 0x46], // RIFF (WAV, WEBP, AVI)
|
|
358
|
+
MKV: [0x1A, 0x45, 0xDF, 0xA3], // Matroska/WebM
|
|
359
|
+
FLAC: [0x66, 0x4C, 0x61, 0x43], // fLaC
|
|
360
|
+
OGG: [0x4F, 0x67, 0x67, 0x53], // OggS
|
|
361
|
+
BMP: [0x42, 0x4D], // BM
|
|
362
|
+
};
|
|
363
|
+
// OpenClaw ecosystem filenames that need semantic analysis
|
|
364
|
+
FileClassifier.SEMANTIC_FILENAMES = new Set([
|
|
365
|
+
'skill.md', 'hook.md', 'agents.md', 'soul.md',
|
|
366
|
+
'bootstrap.md', 'boot.md', 'tools.md', 'heartbeat.md',
|
|
367
|
+
'identity.md', 'user.md',
|
|
368
|
+
]);
|
|
369
|
+
// ── Content-based script detection patterns ────────────────────────
|
|
370
|
+
// Checked only on files that are valid UTF-8 text (no binary garbage).
|
|
371
|
+
FileClassifier.SCRIPT_PATTERNS = [
|
|
372
|
+
/^#!.*\/(bash|sh|zsh|fish|dash)\b/m, // Unix shell shebang
|
|
373
|
+
/^#!.*\/(python|ruby|perl|node)\b/m, // Interpreter shebang
|
|
374
|
+
/^<\?php\b/m, // PHP open tag
|
|
375
|
+
/^\s*@echo\s+off\b/im, // Windows batch
|
|
376
|
+
/^\s*Set-StrictMode\b/m, // PowerShell
|
|
377
|
+
/^\s*\$ErrorActionPreference\b/m, // PowerShell
|
|
378
|
+
/^\s*param\s*\(/m, // PowerShell param block
|
|
379
|
+
/^\s*Function\s+\w+/im, // VBScript / PowerShell
|
|
380
|
+
/^\s*Dim\s+\w+/m, // VBScript
|
|
381
|
+
/^\s*import\s+\w+/m, // Python / JS / TS
|
|
382
|
+
/^\s*from\s+\w+\s+import\s/m, // Python
|
|
383
|
+
/^\s*def\s+\w+\s*\(/m, // Python function
|
|
384
|
+
/^\s*require\s*\(\s*['"][^'"]+['"]\s*\)/m, // Node.js CJS
|
|
385
|
+
/^\s*export\s+(default\s+)?function\b/m, // JS/TS ESM
|
|
386
|
+
/^\s*const\s+\w+\s*=\s*require\b/m, // Node.js CJS
|
|
387
|
+
// Shell scripting patterns (shebanless scripts)
|
|
388
|
+
/^\s*(?:if|while|until)\s+\[/m, // Shell conditionals: if [ ... ], while [ ... ]
|
|
389
|
+
/^\s*for\s+\w+\s+in\s/m, // Shell for loop: for x in ...
|
|
390
|
+
/^\s*case\s+.*\s+in\s*$/m, // Shell case: case $x in
|
|
391
|
+
/^\s*(?:export|readonly)\s+\w+=/m, // Shell variable export/readonly
|
|
392
|
+
/^\s*(?:source|\.)\s+[\/~"']/m, // Shell source: source /path or . /path
|
|
393
|
+
/^\s*(?:exit|return)\s+\d/m, // Shell exit/return with code
|
|
394
|
+
];
|
|
395
|
+
// Minimum number of pattern matches to classify as script
|
|
396
|
+
FileClassifier.SCRIPT_PATTERN_THRESHOLD = 2;
|
|
397
|
+
// Filenames inside ZIP that indicate an OpenClaw skill/hook package
|
|
398
|
+
FileClassifier.ZIP_SKILL_MARKERS = [
|
|
399
|
+
'skill.md', 'hook.md', 'agents.md', 'soul.md',
|
|
400
|
+
];
|
|
401
|
+
// Filenames inside ZIP that indicate a binary/executable archive
|
|
402
|
+
FileClassifier.ZIP_EXEC_EXTENSIONS = [
|
|
403
|
+
'.exe', '.dll', '.com', '.so', '.dylib', '.bin', '.elf',
|
|
404
|
+
'.ps1', '.bat', '.cmd', '.vbs',
|
|
405
|
+
];
|
|
406
|
+
// Filenames inside ZIP that indicate an Office OOXML document
|
|
407
|
+
FileClassifier.ZIP_OOXML_MARKERS = [
|
|
408
|
+
'[content_types].xml', 'word/', 'xl/', 'ppt/',
|
|
409
|
+
'_rels/', 'docprops/',
|
|
410
|
+
];
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { SensitiveFilePolicy } from './scanner';
|
|
2
|
+
interface VTSentinelConfig {
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
watchDirs?: string[];
|
|
5
|
+
autoScan?: boolean;
|
|
6
|
+
maxFileSizeMb?: number;
|
|
7
|
+
sensitiveFilePolicy?: SensitiveFilePolicy;
|
|
8
|
+
}
|
|
9
|
+
interface PluginApi {
|
|
10
|
+
logger: {
|
|
11
|
+
info: (msg: string) => void;
|
|
12
|
+
warn: (msg: string) => void;
|
|
13
|
+
error: (msg: string) => void;
|
|
14
|
+
};
|
|
15
|
+
config?: {
|
|
16
|
+
plugins?: {
|
|
17
|
+
entries?: Record<string, {
|
|
18
|
+
config?: VTSentinelConfig;
|
|
19
|
+
}>;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
registerService: (service: {
|
|
23
|
+
id: string;
|
|
24
|
+
start: () => void;
|
|
25
|
+
stop: () => void;
|
|
26
|
+
}) => void;
|
|
27
|
+
registerTool: (tool: {
|
|
28
|
+
name: string;
|
|
29
|
+
description: string;
|
|
30
|
+
parameters: object;
|
|
31
|
+
execute: (ctx: any, params: any) => Promise<any>;
|
|
32
|
+
}) => void;
|
|
33
|
+
registerHook?: (events: string | string[], handler: (event: any) => Promise<any>, opts?: object) => void;
|
|
34
|
+
onToolResult?: (handler: (event: any) => Promise<any>) => void;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Simple semver comparison: returns true if `latest` is newer than `current`.
|
|
38
|
+
* Only handles x.y.z format (no pre-release tags).
|
|
39
|
+
*/
|
|
40
|
+
export declare function isNewerVersion(latest: string, current: string): boolean;
|
|
41
|
+
export declare function isSelfPath(filePath: string): boolean;
|
|
42
|
+
export default function vtSentinelPlugin(api: PluginApi): void;
|
|
43
|
+
export {};
|