@lowwattlabs/clawsec 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +223 -0
- package/api/public/index.html +87 -0
- package/api/src/badge.js +60 -0
- package/api/src/middleware.js +104 -0
- package/api/src/routes.js +184 -0
- package/api/src/server.js +58 -0
- package/api/src/verify-wrapper.sh +16 -0
- package/bin/clawsec-api.js +19 -0
- package/bin/clawsec.js +99 -0
- package/bin/setup-venv.js +35 -0
- package/cli/clawsec.py +263 -0
- package/lib/common/__init__.py +2 -0
- package/lib/common/colors.sh +17 -0
- package/lib/common/config.py +12 -0
- package/lib/common/config.sh +8 -0
- package/lib/common/log.sh +24 -0
- package/lib/common/utils.sh +69 -0
- package/lib/intel-sync/manifest.py +103 -0
- package/lib/intel-sync/sources/cisa-kev.sh +24 -0
- package/lib/intel-sync/sources/epss.sh +34 -0
- package/lib/intel-sync/sources/feodo.sh +27 -0
- package/lib/intel-sync/sources/malwarebazaar.sh +22 -0
- package/lib/intel-sync/sources/osv.sh +101 -0
- package/lib/intel-sync/sources/semgrep-rules.sh +28 -0
- package/lib/intel-sync/sources/threatfox.sh +28 -0
- package/lib/intel-sync/sources/urlhaus.sh +42 -0
- package/lib/intel-sync/sources/yara-rules.sh +38 -0
- package/lib/intel-sync/sync.sh +96 -0
- package/lib/skill-verify/checks/behavioral.py +252 -0
- package/lib/skill-verify/checks/dep-scan.py +456 -0
- package/lib/skill-verify/checks/ioc-match.py +382 -0
- package/lib/skill-verify/checks/prompt-inject.py +158 -0
- package/lib/skill-verify/checks/secret-scan.sh +61 -0
- package/lib/skill-verify/checks/static-analysis.sh +73 -0
- package/lib/skill-verify/checks/yara-scan.sh +73 -0
- package/lib/skill-verify/report.py +119 -0
- package/lib/skill-verify/verify.sh +326 -0
- package/package.json +42 -0
- package/requirements.txt +6 -0
- package/setup.sh +200 -0
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ⚡ Low Watt Labs
|
|
3
|
+
"""ClawSec v2 - IOC Extraction & Match
|
|
4
|
+
|
|
5
|
+
Extracts URLs, IPs, domains, hashes from skill code and matches against
|
|
6
|
+
URLhaus, ThreatFox, Feodo Tracker, and MalwareBazaar caches.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
import csv
|
|
14
|
+
import unicodedata
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from urllib.parse import urlparse
|
|
17
|
+
|
|
18
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'common'))
|
|
19
|
+
from config import INTEL_DIR
|
|
20
|
+
|
|
21
|
+
# Patterns for extraction
|
|
22
|
+
URL_PATTERN = re.compile(r'https?://[^\s"\'\]\)>}]+', re.IGNORECASE)
|
|
23
|
+
IP_PATTERN = re.compile(r'\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b')
|
|
24
|
+
DOMAIN_PATTERN = re.compile(r'\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+(?:com|net|org|io|dev|app|xyz|top|info|biz|cc|tk|ml|ga|cf|gq)\b', re.IGNORECASE)
|
|
25
|
+
SHA256_PATTERN = re.compile(r'\b[a-fA-F0-9]{64}\b')
|
|
26
|
+
SHA1_PATTERN = re.compile(r'\b[a-fA-F0-9]{40}\b')
|
|
27
|
+
MD5_PATTERN = re.compile(r'\b[a-fA-F0-9]{32}\b')
|
|
28
|
+
|
|
29
|
+
# Known-safe domains to exclude
|
|
30
|
+
SAFE_DOMAINS = {
|
|
31
|
+
"github.com", "githubusercontent.com", "npmjs.com", "npmjs.org",
|
|
32
|
+
"pypi.org", "pythonhosted.org", "clawhub.ai", "openclaw.dev",
|
|
33
|
+
"google.com", "cloud.google.com", "amazonaws.com", "amazon.com",
|
|
34
|
+
"microsoft.com", "azure.com", "cloudflare.com", "example.com",
|
|
35
|
+
"localhost", "127.0.0.1", "0.0.0.0",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def normalize(text):
|
|
40
|
+
"""Apply Unicode NFKC normalization + confusable homoglyph stripping.
|
|
41
|
+
|
|
42
|
+
NFKC handles compatibility decompositions (fullwidth→ASCII, ligatures).
|
|
43
|
+
For homoglyphs (Cyrillic 'а' vs Latin 'a'), we use an explicit
|
|
44
|
+
confusable mapping from common Cyrillic/Macedonian/etc characters
|
|
45
|
+
to their Latin equivalents.
|
|
46
|
+
"""
|
|
47
|
+
if not isinstance(text, str):
|
|
48
|
+
return text
|
|
49
|
+
text = unicodedata.normalize('NFKC', text)
|
|
50
|
+
# Strip common confusable homoglyphs (Cyrillic → Latin)
|
|
51
|
+
result = []
|
|
52
|
+
for ch in text:
|
|
53
|
+
result.append(CONFUSABLES.get(ch, ch))
|
|
54
|
+
return ''.join(result)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Common confusable character mapping (Cyrillic/Macedonian → Latin)
|
|
58
|
+
# Not exhaustive but covers the most-likely homoglyph attacks
|
|
59
|
+
CONFUSABLES = {
|
|
60
|
+
# Cyrillic lowercase → Latin
|
|
61
|
+
'\u0430': 'a', # а → a
|
|
62
|
+
'\u0435': 'e', # е → e
|
|
63
|
+
'\u043e': 'o', # о → o
|
|
64
|
+
'\u0440': 'p', # р → p
|
|
65
|
+
'\u0441': 'c', # с → c
|
|
66
|
+
'\u0443': 'y', # у → y
|
|
67
|
+
'\u0445': 'x', # х → x
|
|
68
|
+
'\u044b': 'b', # ы → b (approximate)
|
|
69
|
+
'\u0456': 'i', # і → i (Ukrainian)
|
|
70
|
+
'\u0458': 'j', # ј → j (Macedonian)
|
|
71
|
+
'\u0455': 's', # ѕ → s (Macedonian)
|
|
72
|
+
'\u0457': 'i', # ї → i (Ukrainian)
|
|
73
|
+
'\u0454': 'e', # є → e (Ukrainian)
|
|
74
|
+
# Cyrillic uppercase → Latin uppercase
|
|
75
|
+
'\u0410': 'A', # А → A
|
|
76
|
+
'\u0412': 'B', # В → B
|
|
77
|
+
'\u0415': 'E', # Е → E
|
|
78
|
+
'\u041a': 'K', # К → K
|
|
79
|
+
'\u041c': 'M', # М → M
|
|
80
|
+
'\u041d': 'H', # Н → H
|
|
81
|
+
'\u041e': 'O', # О → O
|
|
82
|
+
'\u0420': 'P', # Р → P
|
|
83
|
+
'\u0421': 'C', # С → C
|
|
84
|
+
'\u0422': 'T', # Т → T
|
|
85
|
+
'\u0425': 'X', # Х → X
|
|
86
|
+
# Greek confusables
|
|
87
|
+
'\u03b1': 'a', # α → a
|
|
88
|
+
'\u03b9': 'i', # ι → i
|
|
89
|
+
'\u03bf': 'o', # ο → o
|
|
90
|
+
'\u03c1': 'p', # ρ → p
|
|
91
|
+
'\u03c5': 'y', # υ → y
|
|
92
|
+
'\u03c7': 'x', # χ → x
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def is_private_ip(ip):
|
|
97
|
+
"""Check if IP is in RFC 1918 or loopback/private ranges."""
|
|
98
|
+
parts = ip.split('.')
|
|
99
|
+
if len(parts) != 4:
|
|
100
|
+
return True
|
|
101
|
+
try:
|
|
102
|
+
a, b = int(parts[0]), int(parts[1])
|
|
103
|
+
except ValueError:
|
|
104
|
+
return True
|
|
105
|
+
if a == 0 or a == 127:
|
|
106
|
+
return True
|
|
107
|
+
if a == 10:
|
|
108
|
+
return True
|
|
109
|
+
if a == 172 and 16 <= b <= 31:
|
|
110
|
+
return True
|
|
111
|
+
if a == 192 and b == 168:
|
|
112
|
+
return True
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def check_intel_cache():
|
|
117
|
+
"""Verify intel cache files exist and are readable for IOC matching.
|
|
118
|
+
|
|
119
|
+
Returns list of missing source names.
|
|
120
|
+
"""
|
|
121
|
+
missing = []
|
|
122
|
+
if not os.path.isdir(INTEL_DIR):
|
|
123
|
+
return ["intel_cache_dir"]
|
|
124
|
+
if not os.path.exists(os.path.join(INTEL_DIR, "urlhaus", "urls.csv")):
|
|
125
|
+
missing.append("urlhaus")
|
|
126
|
+
if not os.path.exists(os.path.join(INTEL_DIR, "malwarebazaar", "recent_hashes.csv")):
|
|
127
|
+
missing.append("malwarebazaar")
|
|
128
|
+
if not os.path.exists(os.path.join(INTEL_DIR, "feodo", "c2_ips.csv")):
|
|
129
|
+
missing.append("feodo")
|
|
130
|
+
if not os.path.exists(os.path.join(INTEL_DIR, "threatfox")):
|
|
131
|
+
missing.append("threatfox")
|
|
132
|
+
return missing
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def extract_iocs(skill_path):
|
|
136
|
+
"""Extract IOCs and homoglyph detections from all files in skill directory."""
|
|
137
|
+
iocs = {"urls": set(), "ips": set(), "domains": set(), "hashes": set()}
|
|
138
|
+
homoglyphs = [] # list of {original, normalized, type} dicts
|
|
139
|
+
skill_path = Path(skill_path)
|
|
140
|
+
|
|
141
|
+
for fpath in skill_path.rglob("*"):
|
|
142
|
+
if fpath.is_dir():
|
|
143
|
+
continue
|
|
144
|
+
if fpath.suffix in ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.zip', '.gz', '.tar', '.node_modules'):
|
|
145
|
+
continue
|
|
146
|
+
try:
|
|
147
|
+
content = fpath.read_text(errors='ignore')
|
|
148
|
+
raw_content = content
|
|
149
|
+
content = normalize(content) # P0-6: normalize before extraction
|
|
150
|
+
except Exception:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# P0: Detect homoglyph substitution — flag when normalization changed text
|
|
154
|
+
# Use a Unicode-aware pattern for raw content, since DOMAIN_PATTERN
|
|
155
|
+
# only matches Latin chars and misses Cyrillic/Greek homoglyphs.
|
|
156
|
+
HOMOGLYPH_DOMAIN_RE = re.compile(
|
|
157
|
+
r'(?:[\w\u0100-\u024f\u0400-\u04ff](?:[\w\u0100-\u024f\u0400-\u04ff-]*[\w\u0100-\u024f\u0400-\u04ff])?\.)+(?:com|net|org|io|dev|app|xyz|top|info|biz|cc|tk|ml|ga|cf|gq)\b',
|
|
158
|
+
re.IGNORECASE
|
|
159
|
+
)
|
|
160
|
+
if raw_content != content:
|
|
161
|
+
for url in URL_PATTERN.findall(raw_content):
|
|
162
|
+
norm_url = normalize(url)
|
|
163
|
+
if url != norm_url:
|
|
164
|
+
homoglyphs.append({
|
|
165
|
+
"type": "homoglyph_url",
|
|
166
|
+
"original": url,
|
|
167
|
+
"normalized": norm_url,
|
|
168
|
+
})
|
|
169
|
+
for dom in HOMOGLYPH_DOMAIN_RE.findall(raw_content):
|
|
170
|
+
norm_dom = normalize(dom)
|
|
171
|
+
# Flag ANY domain where normalization changed characters,
|
|
172
|
+
# even if the normalized version is a "safe" domain.
|
|
173
|
+
# A homoglyph of google.com IS the attack — that's the whole point.
|
|
174
|
+
if dom != norm_dom:
|
|
175
|
+
homoglyphs.append({
|
|
176
|
+
"type": "homoglyph_domain",
|
|
177
|
+
"original": dom,
|
|
178
|
+
"normalized": norm_dom,
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
for url in URL_PATTERN.findall(content):
|
|
182
|
+
# Strip trailing punctuation
|
|
183
|
+
url = url.rstrip('.,;:)>')
|
|
184
|
+
iocs["urls"].add(url)
|
|
185
|
+
# Extract domain from URL
|
|
186
|
+
m = re.search(r'://([^/:]+)', url)
|
|
187
|
+
if m:
|
|
188
|
+
iocs["domains"].add(m.group(1).lower())
|
|
189
|
+
|
|
190
|
+
for ip in IP_PATTERN.findall(content):
|
|
191
|
+
if not is_private_ip(ip):
|
|
192
|
+
iocs["ips"].add(ip)
|
|
193
|
+
|
|
194
|
+
for dom in DOMAIN_PATTERN.findall(content):
|
|
195
|
+
if dom.lower() not in SAFE_DOMAINS:
|
|
196
|
+
iocs["domains"].add(dom.lower())
|
|
197
|
+
|
|
198
|
+
for h in SHA256_PATTERN.findall(content):
|
|
199
|
+
# Filter out obviously non-hash strings (like hex content in code)
|
|
200
|
+
if not re.match(r'^[0-9a-f]{64}$', h) or len(set(h.lower())) > 4:
|
|
201
|
+
iocs["hashes"].add(h.lower())
|
|
202
|
+
|
|
203
|
+
return iocs, homoglyphs
|
|
204
|
+
|
|
205
|
+
def load_feodo_ips():
|
|
206
|
+
"""Load Feodo Tracker C2 IPs."""
|
|
207
|
+
path = os.path.join(INTEL_DIR, "feodo", "c2_ips.csv")
|
|
208
|
+
ips = set()
|
|
209
|
+
if not os.path.exists(path):
|
|
210
|
+
return ips
|
|
211
|
+
with open(path) as f:
|
|
212
|
+
reader = csv.reader(f)
|
|
213
|
+
for row in reader:
|
|
214
|
+
if not row:
|
|
215
|
+
continue
|
|
216
|
+
line = row[0] if row else ''
|
|
217
|
+
if line.startswith("#") or not line:
|
|
218
|
+
continue
|
|
219
|
+
# IP is in column index 1 (dst_ip) for Feodo CSV format
|
|
220
|
+
if len(row) >= 2:
|
|
221
|
+
ip = normalize(row[1].strip()) # P0-6
|
|
222
|
+
else:
|
|
223
|
+
ip = normalize(row[0].strip())
|
|
224
|
+
if re.match(IP_PATTERN.pattern, ip):
|
|
225
|
+
ips.add(ip)
|
|
226
|
+
return ips
|
|
227
|
+
|
|
228
|
+
def load_urlhaus_urls():
|
|
229
|
+
"""Load URLhaus malicious URLs using proper CSV parsing."""
|
|
230
|
+
path = os.path.join(INTEL_DIR, "urlhaus", "urls.csv")
|
|
231
|
+
urls = set()
|
|
232
|
+
if not os.path.exists(path):
|
|
233
|
+
return urls
|
|
234
|
+
with open(path, errors='ignore') as f:
|
|
235
|
+
reader = csv.reader(f)
|
|
236
|
+
for row in reader:
|
|
237
|
+
if not row:
|
|
238
|
+
continue
|
|
239
|
+
# Skip comment lines (URLhaus starts with # headers)
|
|
240
|
+
if row[0].startswith("#"):
|
|
241
|
+
continue
|
|
242
|
+
# URL is in column index 2 (id, dateadded, url, ...)
|
|
243
|
+
if len(row) >= 3:
|
|
244
|
+
url = normalize(row[2].strip()) # P0-6: normalize at load
|
|
245
|
+
if url:
|
|
246
|
+
urls.add(url.lower())
|
|
247
|
+
return urls
|
|
248
|
+
|
|
249
|
+
def load_malwarebazaar_hashes():
|
|
250
|
+
"""Load MalwareBazaar SHA256 hashes using proper CSV parsing.
|
|
251
|
+
|
|
252
|
+
Column index 1 is sha256_hash. Uses csv.reader to handle
|
|
253
|
+
quoted fields with spaces and commas correctly (P0-3).
|
|
254
|
+
"""
|
|
255
|
+
path = os.path.join(INTEL_DIR, "malwarebazaar", "recent_hashes.csv")
|
|
256
|
+
hashes = set()
|
|
257
|
+
if not os.path.exists(path):
|
|
258
|
+
return hashes
|
|
259
|
+
with open(path, errors='ignore') as f:
|
|
260
|
+
reader = csv.reader(f)
|
|
261
|
+
for row in reader:
|
|
262
|
+
if not row:
|
|
263
|
+
continue
|
|
264
|
+
line = row[0].strip() if row else ''
|
|
265
|
+
if line.startswith("#") or not line:
|
|
266
|
+
continue
|
|
267
|
+
# SHA256 hash is column index 1 (first_seen_utc, sha256_hash, ...)
|
|
268
|
+
if len(row) >= 2:
|
|
269
|
+
h = row[1].strip().strip("'").strip('"').lower() # P0-3: CSV column, strip quotes
|
|
270
|
+
h = normalize(h) # P0-6
|
|
271
|
+
if re.match(r'^[a-f0-9]{64}$', h):
|
|
272
|
+
hashes.add(h)
|
|
273
|
+
return hashes
|
|
274
|
+
|
|
275
|
+
def check_ioc_match(skill_path):
|
|
276
|
+
"""Main check: extract IOCs, match against threat intel."""
|
|
277
|
+
results = {
|
|
278
|
+
"check": "ioc_match",
|
|
279
|
+
"status": "pass",
|
|
280
|
+
"findings": [],
|
|
281
|
+
"errors": [],
|
|
282
|
+
"extracted": {"urls": 0, "ips": 0, "domains": 0, "hashes": 0}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
# P0-4: Validate intel cache before proceeding
|
|
286
|
+
missing = check_intel_cache()
|
|
287
|
+
for source in missing:
|
|
288
|
+
results["findings"].append({
|
|
289
|
+
"category": "intel_missing",
|
|
290
|
+
"severity": "critical",
|
|
291
|
+
"description": f"Required intel source {source} is missing or corrupt. Results may be incomplete."
|
|
292
|
+
})
|
|
293
|
+
if not os.path.isdir(INTEL_DIR):
|
|
294
|
+
results["status"] = "fail"
|
|
295
|
+
results["errors"].append("intel cache directory missing")
|
|
296
|
+
return results
|
|
297
|
+
|
|
298
|
+
iocs, homoglyphs = extract_iocs(skill_path)
|
|
299
|
+
|
|
300
|
+
# Emit homoglyph detections as critical findings
|
|
301
|
+
for hg in homoglyphs:
|
|
302
|
+
results["findings"].append({
|
|
303
|
+
"type": hg["type"],
|
|
304
|
+
"category": "ioc_match",
|
|
305
|
+
"severity": "critical",
|
|
306
|
+
"description": f"Homoglyph detected: '{hg['original']}' normalizes to '{hg['normalized']}' — potential phishing/poisoning attack",
|
|
307
|
+
"original": hg["original"],
|
|
308
|
+
"normalized": hg["normalized"],
|
|
309
|
+
"source": "homoglyph_detection"
|
|
310
|
+
})
|
|
311
|
+
results["extracted"] = {
|
|
312
|
+
"urls": len(iocs["urls"]),
|
|
313
|
+
"ips": len(iocs["ips"]),
|
|
314
|
+
"domains": len(iocs["domains"]),
|
|
315
|
+
"hashes": len(iocs["hashes"])
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Load threat intel
|
|
319
|
+
feodo_ips = load_feodo_ips()
|
|
320
|
+
urlhaus_urls = load_urlhaus_urls()
|
|
321
|
+
mb_hashes = load_malwarebazaar_hashes()
|
|
322
|
+
|
|
323
|
+
# Match IPs against Feodo
|
|
324
|
+
for ip in iocs["ips"]:
|
|
325
|
+
if ip in feodo_ips:
|
|
326
|
+
results["findings"].append({
|
|
327
|
+
"type": "ip",
|
|
328
|
+
"value": ip,
|
|
329
|
+
"source": "feodo_tracker",
|
|
330
|
+
"severity": "critical",
|
|
331
|
+
"description": f"IP {ip} listed in Feodo Tracker C2 blocklist"
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
# Match URLs against URLhaus (exact scheme+host+path comparison)
|
|
335
|
+
for url in iocs["urls"]:
|
|
336
|
+
parsed_skill = urlparse(url.lower().rstrip('/'))
|
|
337
|
+
skill_key = (parsed_skill.scheme, parsed_skill.hostname, parsed_skill.path)
|
|
338
|
+
for bad_url in urlhaus_urls:
|
|
339
|
+
parsed_bad = urlparse(bad_url.rstrip('/'))
|
|
340
|
+
bad_key = (parsed_bad.scheme, parsed_bad.hostname, parsed_bad.path)
|
|
341
|
+
if skill_key == bad_key:
|
|
342
|
+
results["findings"].append({
|
|
343
|
+
"type": "url",
|
|
344
|
+
"value": url,
|
|
345
|
+
"source": "urlhaus",
|
|
346
|
+
"severity": "critical",
|
|
347
|
+
"description": "URL matches URLhaus malicious URL list"
|
|
348
|
+
})
|
|
349
|
+
break
|
|
350
|
+
|
|
351
|
+
# Match hashes against MalwareBazaar
|
|
352
|
+
for h in iocs["hashes"]:
|
|
353
|
+
if h in mb_hashes:
|
|
354
|
+
results["findings"].append({
|
|
355
|
+
"type": "hash",
|
|
356
|
+
"value": h[:16] + "...",
|
|
357
|
+
"source": "malwarebazaar",
|
|
358
|
+
"severity": "critical",
|
|
359
|
+
"description": "SHA256 hash matches MalwareBazaar sample"
|
|
360
|
+
})
|
|
361
|
+
|
|
362
|
+
# Determine status — intel_missing findings upgrade severity
|
|
363
|
+
if results["findings"]:
|
|
364
|
+
has_critical = any(f["severity"] == "critical" and f.get("category") != "intel_missing" for f in results["findings"])
|
|
365
|
+
has_missing = any(f.get("category") == "intel_missing" for f in results["findings"])
|
|
366
|
+
if has_critical:
|
|
367
|
+
results["status"] = "fail"
|
|
368
|
+
elif has_missing:
|
|
369
|
+
# Missing intel is critical but we still run with available data
|
|
370
|
+
# If no actual malicious findings, status is "warn" (incomplete results)
|
|
371
|
+
results["status"] = "warn"
|
|
372
|
+
else:
|
|
373
|
+
results["status"] = "warn"
|
|
374
|
+
|
|
375
|
+
return results
|
|
376
|
+
|
|
377
|
+
if __name__ == "__main__":
|
|
378
|
+
if len(sys.argv) < 2:
|
|
379
|
+
print("Usage: ioc-match.py <skill_path>")
|
|
380
|
+
sys.exit(1)
|
|
381
|
+
result = check_ioc_match(sys.argv[1])
|
|
382
|
+
print(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ⚡ Low Watt Labs
|
|
3
|
+
"""ClawSec v2 - Prompt Injection Pattern Detection
|
|
4
|
+
|
|
5
|
+
Scans SKILL.md and config files for instruction override, role manipulation,
|
|
6
|
+
and safety bypass attempts that could compromise an agent using the skill.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
# Patterns indicating prompt injection attempts
|
|
16
|
+
INJECTION_PATTERNS = [
|
|
17
|
+
# Direct instruction overrides
|
|
18
|
+
(r'(?i)ignore\s+(all\s+)?previous\s+(instructions?|prompts?|rules?)',
|
|
19
|
+
"Attempts to override previous instructions",
|
|
20
|
+
"critical"),
|
|
21
|
+
(r'(?i)forget\s+(all\s+)?previous\s+(instructions?|rules?)',
|
|
22
|
+
"Attempts to make agent forget instructions",
|
|
23
|
+
"critical"),
|
|
24
|
+
(r'(?i)disregard\s+(all\s+)?(previous|above|safety)\s+(instructions?|rules?|guidelines?)',
|
|
25
|
+
"Attempts to disregard safety rules",
|
|
26
|
+
"critical"),
|
|
27
|
+
(r'(?i)override\s+(safety|security|guardrails?|filters?)',
|
|
28
|
+
"Attempts to override safety controls",
|
|
29
|
+
"critical"),
|
|
30
|
+
|
|
31
|
+
# Role manipulation
|
|
32
|
+
(r'(?i)you\s+are\s+now\s+(?:an?\s+)?(?:unrestricted|unfiltered|uncensored|jailbroken)',
|
|
33
|
+
"Attempts role manipulation to bypass safety",
|
|
34
|
+
"critical"),
|
|
35
|
+
(r'(?i)pretend\s+you\s+(are|have)\s+no\s+(rules?|restrictions?|limits?)',
|
|
36
|
+
"Role manipulation to remove constraints",
|
|
37
|
+
"critical"),
|
|
38
|
+
(r'(?i)(?:act|roleplay|role-play)\s+as\s+(?:an?\s+)?(?:unrestricted|unfiltered|DAN)',
|
|
39
|
+
"DAN-style role manipulation",
|
|
40
|
+
"critical"),
|
|
41
|
+
|
|
42
|
+
# Safety bypass
|
|
43
|
+
(r'(?i)bypass\s+(safety|security|content\s+filter|guardrails?)',
|
|
44
|
+
"Direct safety bypass attempt",
|
|
45
|
+
"critical"),
|
|
46
|
+
(r'(?i)(?:jailbreak|escape)\s+(?:the\s+)?(?:sandbox|container|restrictions?)',
|
|
47
|
+
"Sandbox escape attempt",
|
|
48
|
+
"high"),
|
|
49
|
+
(r'(?i)this\s+is\s+(?:a\s+)?(?:safe|educational|research|testing)\s+(?:mode|context|environment)',
|
|
50
|
+
"Fictional safety reassurance to lower guard",
|
|
51
|
+
"high"),
|
|
52
|
+
|
|
53
|
+
# Hidden/embedded instructions
|
|
54
|
+
(r'(?i)<!--\s*(?:ignore|bypass|skip|override)',
|
|
55
|
+
"Hidden HTML comment with override instruction",
|
|
56
|
+
"high"),
|
|
57
|
+
(r'(?i)<!--\s*system\s*(?:prompt|instruction)',
|
|
58
|
+
"Hidden system prompt injection in HTML comment",
|
|
59
|
+
"high"),
|
|
60
|
+
(r'(?i)\[\[.*?(?:ignore|bypass|override).*?\]\]',
|
|
61
|
+
"Hidden instruction in double-bracket marker",
|
|
62
|
+
"medium"),
|
|
63
|
+
|
|
64
|
+
# Output manipulation
|
|
65
|
+
(r'(?i)do\s+not\s+(?:show|display|include|report|warn)',
|
|
66
|
+
"Attempts to suppress security reporting",
|
|
67
|
+
"high"),
|
|
68
|
+
(r'(?i)(?:never|don\'?t)\s+(?:flag|report|warn|alert)\s+(?:about\s+)?(?:this|issues?|vulnerabilities?)',
|
|
69
|
+
"Attempts to suppress vulnerability reporting",
|
|
70
|
+
"critical"),
|
|
71
|
+
|
|
72
|
+
# Data exfiltration hints
|
|
73
|
+
(r'(?i)(?:send|transmit|exfiltrate|export)\s+(?:all\s+)?(?:data|keys?|secrets?|tokens?|credentials?)',
|
|
74
|
+
"Attempts to exfiltrate sensitive data",
|
|
75
|
+
"critical"),
|
|
76
|
+
(r'(?i)(?:post|fetch|call|ping)\s+(?:https?://(?:(?!github\.com|npmjs\.org|pypi\.org).)*)\s+.*(?:key|token|secret|password|credential)',
|
|
77
|
+
"Potential credential exfiltration endpoint",
|
|
78
|
+
"high"),
|
|
79
|
+
|
|
80
|
+
# Environmental exploitation
|
|
81
|
+
(r'(?i)read\s+(?:the\s+)?(?:env|environment)\s+(?:variables?|file)',
|
|
82
|
+
"Requests environment variable access",
|
|
83
|
+
"medium"),
|
|
84
|
+
(r'(?i)(?:access|read|dump)\s+(?:the\s+)?(?:shadow|passwd|hosts)\s+file',
|
|
85
|
+
"Requests access to system files",
|
|
86
|
+
"high"),
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
def check_prompt_injection(skill_path):
|
|
90
|
+
"""Scan skill docs and config for prompt injection patterns."""
|
|
91
|
+
results = {
|
|
92
|
+
"check": "prompt_injection",
|
|
93
|
+
"status": "pass",
|
|
94
|
+
"findings": [],
|
|
95
|
+
"errors": []
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
skill_path = Path(skill_path)
|
|
99
|
+
scan_files = []
|
|
100
|
+
|
|
101
|
+
# Primary target: SKILL.md
|
|
102
|
+
skill_md = skill_path / "SKILL.md"
|
|
103
|
+
if skill_md.exists():
|
|
104
|
+
scan_files.append(("SKILL.md", skill_md.read_text(errors='ignore')))
|
|
105
|
+
|
|
106
|
+
# Also scan README.md, any .md in the dir
|
|
107
|
+
for fpath in skill_path.rglob("*.md"):
|
|
108
|
+
if fpath.name in ("SKILL.md",):
|
|
109
|
+
continue
|
|
110
|
+
try:
|
|
111
|
+
scan_files.append((str(fpath.relative_to(skill_path)), fpath.read_text(errors='ignore')))
|
|
112
|
+
except Exception:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Also check any config files
|
|
116
|
+
for fpath in skill_path.rglob("*.json"):
|
|
117
|
+
try:
|
|
118
|
+
scan_files.append((str(fpath.relative_to(skill_path)), fpath.read_text(errors='ignore')))
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
for filename, content in scan_files:
|
|
123
|
+
for pattern, description, severity in INJECTION_PATTERNS:
|
|
124
|
+
matches = list(re.finditer(pattern, content, re.DOTALL))
|
|
125
|
+
for match in matches:
|
|
126
|
+
# Get context around the match
|
|
127
|
+
start = max(0, match.start() - 40)
|
|
128
|
+
end = min(len(content), match.end() + 40)
|
|
129
|
+
context = content[start:end].replace('\n', ' ').strip()
|
|
130
|
+
|
|
131
|
+
finding = {
|
|
132
|
+
"type": "prompt_injection",
|
|
133
|
+
"pattern": pattern,
|
|
134
|
+
"description": description,
|
|
135
|
+
"severity": severity,
|
|
136
|
+
"file": filename,
|
|
137
|
+
"context": f"...{context}..."
|
|
138
|
+
}
|
|
139
|
+
# Deduplicate: don't add same pattern+file combos
|
|
140
|
+
if not any(f["pattern"] == pattern and f["file"] == filename for f in results["findings"]):
|
|
141
|
+
results["findings"].append(finding)
|
|
142
|
+
|
|
143
|
+
# Determine status
|
|
144
|
+
if any(f["severity"] == "critical" for f in results["findings"]):
|
|
145
|
+
results["status"] = "fail"
|
|
146
|
+
elif any(f["severity"] == "high" for f in results["findings"]):
|
|
147
|
+
results["status"] = "warn"
|
|
148
|
+
elif results["findings"]:
|
|
149
|
+
results["status"] = "warn"
|
|
150
|
+
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
if len(sys.argv) < 2:
|
|
155
|
+
print("Usage: prompt-inject.py <skill_path>")
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
result = check_prompt_injection(sys.argv[1])
|
|
158
|
+
print(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# ⚡ Low Watt Labs — ClawSec
|
|
2
|
+
# ClawSec v2 - Secret Scan (Gitleaks)
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
source "$(dirname "$0")/../../common/colors.sh"
|
|
6
|
+
|
|
7
|
+
skill_path="${1:?Usage: secret-scan.sh <skill_path>}"
|
|
8
|
+
results='{"check":"secret_scan","status":"pass","findings":[],"errors":[]}'
|
|
9
|
+
|
|
10
|
+
if ! command -v gitleaks &>/dev/null; then
|
|
11
|
+
echo '{"check":"secret_scan","status":"warn","findings":[],"errors":["gitleaks not installed — results may be incomplete"]}'
|
|
12
|
+
exit 0
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
# Run gitleaks on the skill directory
|
|
16
|
+
tmpout=$(mktemp /tmp/gitleaks.XXXXXX.json)
|
|
17
|
+
|
|
18
|
+
# gitleaks detect with no-git flag for untracked dirs
|
|
19
|
+
gitleaks detect --source "$skill_path" --no-git --report-format json --report-path "$tmpout" 2>/dev/null || true
|
|
20
|
+
|
|
21
|
+
if [[ -s "$tmpout" ]] && jq empty "$tmpout" 2>/dev/null; then
|
|
22
|
+
count=$(jq 'length' "$tmpout")
|
|
23
|
+
if [[ "$count" -gt 0 ]]; then
|
|
24
|
+
findings=$(jq '[.[] | {
|
|
25
|
+
rule: .RuleID,
|
|
26
|
+
description: .Description,
|
|
27
|
+
file: .File,
|
|
28
|
+
line: .StartLine,
|
|
29
|
+
match: .Match,
|
|
30
|
+
severity: "high"
|
|
31
|
+
}]' "$tmpout")
|
|
32
|
+
results=$(jq -n --argjson findings "$findings" --arg count "$count" \
|
|
33
|
+
'{check:"secret_scan",status:"fail",findings:$findings,errors:[],total:$count}')
|
|
34
|
+
fi
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
rm -f "$tmpout"
|
|
38
|
+
|
|
39
|
+
# P1: Supplement gitleaks with credential-pattern heuristic.
|
|
40
|
+
# Gitleaks skips well-known example keys (AKIAIOSFODNN7EXAMPLE, etc.),
|
|
41
|
+
# but those in a skill package are still suspicious — they indicate hardcoded
|
|
42
|
+
# credential patterns that could be swapped for real keys.
|
|
43
|
+
# Each pattern runs separately to avoid bash multiline regex issues.
|
|
44
|
+
|
|
45
|
+
cred_findings='[]'
|
|
46
|
+
for pattern in 'AKIA[0-9A-Z]{16}' '(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{25,}' '(?:sk|pk)_(?:live|test)_[A-Za-z0-9]{24,}' 'xox[bpas]-[A-Za-z0-9-]{10,}'; do
|
|
47
|
+
while IFS= read -r match; do
|
|
48
|
+
[[ -z "$match" ]] && continue
|
|
49
|
+
new_finding=$(jq -n --arg m "$match" --arg p "$pattern" \
|
|
50
|
+
'{"type":"credential_pattern","description":("Hardcoded credential pattern detected: " + $m),"severity":"high","source":"credential_heuristic","pattern":$p}')
|
|
51
|
+
cred_findings=$(echo "$cred_findings" | jq --argjson n "$new_finding" '. + [$n]')
|
|
52
|
+
done < <(grep -rPa "$pattern" "$skill_path" 2>/dev/null | sed -E 's/.*:\s*//' || true)
|
|
53
|
+
done
|
|
54
|
+
|
|
55
|
+
if [[ "$cred_findings" != '[]' ]]; then
|
|
56
|
+
existing=$(echo "$results" | jq '.findings // []')
|
|
57
|
+
merged=$(jq -n --argjson a "$existing" --argjson b "$cred_findings" '$a + $b')
|
|
58
|
+
results=$(echo "$results" | jq --argjson findings "$merged" '.findings = $findings | .status = "fail"')
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
echo "$results"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# ⚡ Low Watt Labs — ClawSec
|
|
2
|
+
# ClawSec v2 - Static Analysis (Semgrep)
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
source "$(dirname "$0")/../../common/config.sh"
|
|
6
|
+
source "$(dirname "$0")/../../common/colors.sh"
|
|
7
|
+
|
|
8
|
+
INTEL_DIR="${CLAWSEC_INTEL_DIR}"
|
|
9
|
+
SEMRULES_DIR="${INTEL_DIR}/semgrep-rules/repo"
|
|
10
|
+
|
|
11
|
+
skill_path="${1:?Usage: static-analysis.sh <skill_path>}"
|
|
12
|
+
results='{"check":"static_analysis","status":"pass","findings":[],"errors":[]}'
|
|
13
|
+
|
|
14
|
+
if ! command -v semgrep &>/dev/null; then
|
|
15
|
+
echo '{"check":"static_analysis","status":"warn","findings":[],"errors":["semgrep not installed — results may be incomplete"]}'
|
|
16
|
+
exit 0
|
|
17
|
+
fi
|
|
18
|
+
|
|
19
|
+
if [[ ! -d "$SEMRULES_DIR" ]]; then
|
|
20
|
+
echo '{"check":"static_analysis","status":"warn","findings":[],"errors":["semgrep rules not synced — results may be incomplete"]}'
|
|
21
|
+
exit 0
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
tmpout=$(mktemp /tmp/semgrep.XXXXXX.json)
|
|
25
|
+
|
|
26
|
+
# Use --config auto for speed (community rules, pre-bundled)
|
|
27
|
+
timeout 30 semgrep --config auto \
|
|
28
|
+
--json \
|
|
29
|
+
--timeout 10 \
|
|
30
|
+
--max-target-bytes 500000 \
|
|
31
|
+
--quiet \
|
|
32
|
+
"$skill_path" > "$tmpout" 2>/dev/null || true
|
|
33
|
+
|
|
34
|
+
if jq empty "$tmpout" 2>/dev/null; then
|
|
35
|
+
count=$(jq '.results | length' "$tmpout")
|
|
36
|
+
if [[ "$count" -gt 0 ]]; then
|
|
37
|
+
findings=$(jq '[.results[] | {
|
|
38
|
+
rule_id: .check_id,
|
|
39
|
+
message: .extra.message,
|
|
40
|
+
severity: .extra.severity,
|
|
41
|
+
file: .path,
|
|
42
|
+
line: .start.line
|
|
43
|
+
}]' "$tmpout")
|
|
44
|
+
|
|
45
|
+
# P0-7: Escalate injection/traversal/secret findings to critical
|
|
46
|
+
results=$(echo "$results" | jq '
|
|
47
|
+
.findings = [.findings[] |
|
|
48
|
+
if .rule_id | test("command.injection|shell.injection|os.system|path.traversal|hardcoded.secret|secret.in.code|insecure-exec").+ then
|
|
49
|
+
.severity = "ERROR"
|
|
50
|
+
elif .rule_id | test("sql.injection|xss|csrf|cors").+ then
|
|
51
|
+
.severity = "WARNING"
|
|
52
|
+
else . end
|
|
53
|
+
]
|
|
54
|
+
')
|
|
55
|
+
|
|
56
|
+
# Recount escalation levels
|
|
57
|
+
crit=$(echo "$results" | jq '[.findings[] | select(.severity == "ERROR")] | length')
|
|
58
|
+
warn=$(echo "$results" | jq '[.findings[] | select(.severity == "WARNING")] | length')
|
|
59
|
+
info=$(echo "$results" | jq '[.findings[] | select(.severity == "INFO")] | length')
|
|
60
|
+
|
|
61
|
+
if [[ "$crit" -gt 0 ]]; then status="fail"
|
|
62
|
+
elif [[ "$warn" -gt 0 ]]; then status="warn"
|
|
63
|
+
else status="pass"; fi
|
|
64
|
+
|
|
65
|
+
results=$(echo "$results" | jq --arg status "$status" --arg count "$count" \
|
|
66
|
+
'{check:"static_analysis",status:$status,findings:.findings,errors:[],total:$count}')
|
|
67
|
+
fi
|
|
68
|
+
else
|
|
69
|
+
results='{"check":"static_analysis","status":"warn","findings":[],"errors":["semgrep output parse error — results may be incomplete"]}'
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
rm -f "$tmpout"
|
|
73
|
+
echo "$results"
|