@lowwattlabs/clawsec 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +223 -0
  3. package/api/public/index.html +87 -0
  4. package/api/src/badge.js +60 -0
  5. package/api/src/middleware.js +104 -0
  6. package/api/src/routes.js +184 -0
  7. package/api/src/server.js +58 -0
  8. package/api/src/verify-wrapper.sh +16 -0
  9. package/bin/clawsec-api.js +19 -0
  10. package/bin/clawsec.js +99 -0
  11. package/bin/setup-venv.js +35 -0
  12. package/cli/clawsec.py +263 -0
  13. package/lib/common/__init__.py +2 -0
  14. package/lib/common/colors.sh +17 -0
  15. package/lib/common/config.py +12 -0
  16. package/lib/common/config.sh +8 -0
  17. package/lib/common/log.sh +24 -0
  18. package/lib/common/utils.sh +69 -0
  19. package/lib/intel-sync/manifest.py +103 -0
  20. package/lib/intel-sync/sources/cisa-kev.sh +24 -0
  21. package/lib/intel-sync/sources/epss.sh +34 -0
  22. package/lib/intel-sync/sources/feodo.sh +27 -0
  23. package/lib/intel-sync/sources/malwarebazaar.sh +22 -0
  24. package/lib/intel-sync/sources/osv.sh +101 -0
  25. package/lib/intel-sync/sources/semgrep-rules.sh +28 -0
  26. package/lib/intel-sync/sources/threatfox.sh +28 -0
  27. package/lib/intel-sync/sources/urlhaus.sh +42 -0
  28. package/lib/intel-sync/sources/yara-rules.sh +38 -0
  29. package/lib/intel-sync/sync.sh +96 -0
  30. package/lib/skill-verify/checks/behavioral.py +252 -0
  31. package/lib/skill-verify/checks/dep-scan.py +456 -0
  32. package/lib/skill-verify/checks/ioc-match.py +382 -0
  33. package/lib/skill-verify/checks/prompt-inject.py +158 -0
  34. package/lib/skill-verify/checks/secret-scan.sh +61 -0
  35. package/lib/skill-verify/checks/static-analysis.sh +73 -0
  36. package/lib/skill-verify/checks/yara-scan.sh +73 -0
  37. package/lib/skill-verify/report.py +119 -0
  38. package/lib/skill-verify/verify.sh +326 -0
  39. package/package.json +42 -0
  40. package/requirements.txt +6 -0
  41. package/setup.sh +200 -0
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env python3
2
+ # ⚡ Low Watt Labs
3
+ """ClawSec v2 - Dependency Scan
4
+
5
+ Scans skill dependencies against local OSV + CISA KEV + EPSS caches.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import re
11
+ import sys
12
+ import unicodedata
13
+ from pathlib import Path
14
+ from packaging.version import Version, InvalidVersion
15
+
16
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'common'))
17
+ from config import INTEL_DIR
18
+
19
+ def load_cisa_kev():
20
+ """Load CISA KEV catalog, return set of CVE IDs."""
21
+ path = os.path.join(INTEL_DIR, "cisa-kev", "known_exploited_vulnerabilities.json")
22
+ if not os.path.exists(path):
23
+ return set()
24
+ with open(path) as f:
25
+ data = json.load(f)
26
+ return {v.get("cveID", "") for v in data.get("vulnerabilities", [])}
27
+
28
+ def load_epss():
29
+ """Load EPSS scores, return dict of CVE -> (probability, percentile)."""
30
+ path = os.path.join(INTEL_DIR, "epss", "epss_scores-current.csv")
31
+ epss = {}
32
+ if not os.path.exists(path):
33
+ return epss
34
+ with open(path) as f:
35
+ for line in f:
36
+ line = line.strip()
37
+ if line.startswith("#") or line.startswith("cve"):
38
+ continue
39
+ parts = line.split(",")
40
+ if len(parts) >= 3:
41
+ try:
42
+ epss[parts[0]] = (float(parts[1]), float(parts[2]))
43
+ except (ValueError, IndexError):
44
+ continue
45
+ return epss
46
+
47
+ def load_osv_ecosystem(ecosystem):
48
+ """Load OSV advisories for an ecosystem using the consolidated index.
49
+
50
+ Falls back to full directory scan if index.json is missing.
51
+ """
52
+ eco_dir = os.path.join(INTEL_DIR, "osv", ecosystem)
53
+ if not os.path.isdir(eco_dir):
54
+ return []
55
+
56
+ # Try loading via index for fast lookup
57
+ index_path = os.path.join(eco_dir, "index.json")
58
+ if os.path.exists(index_path):
59
+ try:
60
+ with open(index_path) as f:
61
+ index = json.load(f)
62
+ return _load_osv_via_index(eco_dir, index)
63
+ except (json.JSONDecodeError, OSError):
64
+ pass
65
+
66
+ # Fallback: iterate all files (slow, ~10s for 219K advisories)
67
+ advisories = []
68
+ for fname in os.listdir(eco_dir):
69
+ if not fname.endswith(".json") or fname == "index.json":
70
+ continue
71
+ fpath = os.path.join(eco_dir, fname)
72
+ try:
73
+ with open(fpath) as f:
74
+ adv = json.load(f)
75
+ for affected in adv.get("affected", []):
76
+ pkg = affected.get("package", {})
77
+ name = pkg.get("name", "")
78
+ eco = pkg.get("ecosystem", "")
79
+ ranges = affected.get("ranges", [])
80
+ versions = affected.get("versions", [])
81
+ if name:
82
+ advisories.append({
83
+ "id": adv.get("id", ""),
84
+ "summary": adv.get("summary", ""),
85
+ "cve_ids": [a for a in adv.get("aliases", []) if a.startswith("CVE-")],
86
+ "package": name,
87
+ "ecosystem": eco,
88
+ "ranges": ranges,
89
+ "versions": versions,
90
+ "severity": adv.get("database_specific", {}).get("severity", ""),
91
+ })
92
+ except (json.JSONDecodeError, KeyError):
93
+ continue
94
+ return advisories
95
+
96
+
97
+ def _load_osv_via_index(eco_dir, index):
98
+ """Load OSV advisories using the pre-built package index.
99
+
100
+ The index maps lowercase package names to lists of advisory filenames.
101
+ We only load advisory files referenced by the index entries we need.
102
+ """
103
+ advisories = []
104
+ advisory_cache = {} # cache parsed advisories by filename
105
+
106
+ for pkg_name_lower, fnames in index.items():
107
+ for fname in fnames:
108
+ if fname in advisory_cache:
109
+ adv = advisory_cache[fname]
110
+ else:
111
+ fpath = os.path.join(eco_dir, fname)
112
+ try:
113
+ with open(fpath) as f:
114
+ adv = json.load(f)
115
+ advisory_cache[fname] = adv
116
+ except (json.JSONDecodeError, KeyError, OSError):
117
+ continue
118
+
119
+ for affected in adv.get("affected", []):
120
+ pkg = affected.get("package", {})
121
+ name = pkg.get("name", "")
122
+ eco = pkg.get("ecosystem", "")
123
+ ranges = affected.get("ranges", [])
124
+ versions = affected.get("versions", [])
125
+ if name:
126
+ advisories.append({
127
+ "id": adv.get("id", ""),
128
+ "summary": adv.get("summary", ""),
129
+ "cve_ids": [a for a in adv.get("aliases", []) if a.startswith("CVE-")],
130
+ "package": name,
131
+ "ecosystem": eco,
132
+ "ranges": ranges,
133
+ "versions": versions,
134
+ "severity": adv.get("database_specific", {}).get("severity", ""),
135
+ })
136
+
137
+ return advisories
138
+
139
+ def parse_skill_deps(skill_path):
140
+ """Parse dependencies from skill manifest/package files."""
141
+ deps = []
142
+ skill_path = Path(skill_path)
143
+
144
+ # Check for package.json
145
+ pkg_json = skill_path / "package.json"
146
+ if pkg_json.exists():
147
+ try:
148
+ with open(pkg_json) as f:
149
+ pkg = json.load(f)
150
+ for name, ver in {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}.items():
151
+ deps.append({"name": name, "version": ver, "ecosystem": "npm"})
152
+ except json.JSONDecodeError:
153
+ pass
154
+
155
+ # Check for requirements.txt
156
+ req_file = skill_path / "requirements.txt"
157
+ if req_file.exists():
158
+ with open(req_file) as f:
159
+ for line in f:
160
+ line = line.strip()
161
+ if not line or line.startswith("#"):
162
+ continue
163
+ # Parse package==version or package>=version
164
+ m = re.match(r'^([a-zA-Z0-9_-]+)\s*[=<>!~]+\s*([0-9][0-9.]*)', line)
165
+ if m:
166
+ deps.append({"name": m.group(1), "version": m.group(2), "ecosystem": "PyPI"})
167
+
168
+ # Check SKILL.md for declared dependencies
169
+ skill_md = skill_path / "SKILL.md"
170
+ if skill_md.exists():
171
+ with open(skill_md) as f:
172
+ content = f.read()
173
+ # Look for dependency sections
174
+ dep_section = re.search(r'##\s*Dependencies?\s*\n(.*?)(?=\n##|\Z)', content, re.DOTALL | re.IGNORECASE)
175
+ if dep_section:
176
+ for line in dep_section.group(1).split("\n"):
177
+ m = re.match(r'-\s+([a-zA-Z0-9_/-]+)\s*[@=<>~]+\s*([0-9][0-9.]*)', line)
178
+ if m:
179
+ deps.append({"name": m.group(1), "version": m.group(2), "ecosystem": "unknown"})
180
+
181
+ return deps
182
+
183
+ def version_in_range(ver_str, ranges_list):
184
+ """Check if a version falls within any vulnerable range from OSV ranges[]."""
185
+ try:
186
+ ver = Version(ver_str)
187
+ except InvalidVersion:
188
+ return False
189
+ for r in ranges_list:
190
+ rtype = r.get("type", "")
191
+ if rtype != "SEMVER" and rtype != "ECOSYSTEM":
192
+ continue
193
+ introduced = None
194
+ fixed = None
195
+ for event in r.get("events", []):
196
+ if "introduced" in event:
197
+ introduced = event["introduced"]
198
+ elif "fixed" in event:
199
+ fixed = event["fixed"]
200
+ elif "last_affected" in event:
201
+ fixed = event["last_affected"] # treat as upper bound
202
+ if introduced is None:
203
+ continue
204
+ try:
205
+ introduced_ver = Version(introduced) if introduced != "0" else Version("0")
206
+ except InvalidVersion:
207
+ introduced_ver = Version("0")
208
+ # Version is vulnerable if >= introduced and (< fixed or no fixed)
209
+ if ver >= introduced_ver:
210
+ if fixed is None:
211
+ return True
212
+ try:
213
+ if ver < Version(fixed):
214
+ return True
215
+ except InvalidVersion:
216
+ return True # can't parse fixed, assume vulnerable
217
+ return False
218
+
219
+
220
+ def check_staleness(missing_criticals=None):
221
+ """Check intel cache staleness from manifest.
222
+
223
+ Returns a list of staleness findings:
224
+ - 30+ days: warn severity
225
+ - 90+ days: critical severity (scan can't be trusted)
226
+ """
227
+ import datetime
228
+ findings = []
229
+ manifest_path = os.path.join(INTEL_DIR, "manifest.json")
230
+
231
+ if not os.path.exists(manifest_path):
232
+ return findings
233
+
234
+ try:
235
+ with open(manifest_path) as f:
236
+ manifest = json.load(f)
237
+ except (json.JSONDecodeError, OSError):
238
+ return findings
239
+
240
+ now = datetime.datetime.now(datetime.timezone.utc)
241
+
242
+ for src in manifest.get("sources", []):
243
+ last_sync = src.get("last_sync", "")
244
+ if not last_sync or last_sync == "never":
245
+ findings.append({
246
+ "category": "intel_stale",
247
+ "source": src["name"],
248
+ "severity": "critical",
249
+ "description": f"Intel source {src['name']} has never been synced. Run: clawsec sync {src['name']}"
250
+ })
251
+ continue
252
+
253
+ try:
254
+ # Parse ISO timestamp
255
+ ts = last_sync.replace("Z", "+00:00")
256
+ sync_dt = datetime.datetime.fromisoformat(ts)
257
+ if sync_dt.tzinfo is None:
258
+ sync_dt = sync_dt.replace(tzinfo=datetime.timezone.utc)
259
+ age_days = (now - sync_dt).days
260
+ except (ValueError, AttributeError):
261
+ continue
262
+
263
+ if age_days >= 90:
264
+ findings.append({
265
+ "category": "intel_stale",
266
+ "source": src["name"],
267
+ "severity": "critical",
268
+ "description": f"Intel source {src['name']} is {age_days} days old (>= 90 days). Scan results cannot be trusted. Run: clawsec sync {src['name']}"
269
+ })
270
+ elif age_days >= 30:
271
+ findings.append({
272
+ "category": "intel_stale",
273
+ "source": src["name"],
274
+ "severity": "high",
275
+ "description": f"Intel source {src['name']} is {age_days} days old (>= 30 days). Results may be outdated. Run: clawsec sync {src['name']}"
276
+ })
277
+
278
+ return findings
279
+
280
+
281
+ def check_intel_cache():
282
+ """Verify intel cache directory and required sources exist."""
283
+ missing = []
284
+ if not os.path.isdir(INTEL_DIR):
285
+ return ["intel_cache_dir", "cisa_kev", "osv", "epss"]
286
+ if not os.path.exists(os.path.join(INTEL_DIR, "cisa-kev", "known_exploited_vulnerabilities.json")):
287
+ missing.append("cisa_kev")
288
+ if not os.path.isdir(os.path.join(INTEL_DIR, "osv")):
289
+ missing.append("osv")
290
+ if not os.path.exists(os.path.join(INTEL_DIR, "epss", "epss_scores-current.csv")):
291
+ missing.append("epss")
292
+ return missing
293
+
294
+ def check_dependencies(skill_path):
295
+ """Main check: match deps against OSV, flag KEV, rank by EPSS."""
296
+ results = {
297
+ "check": "dependency_scan",
298
+ "status": "pass",
299
+ "findings": [],
300
+ "errors": []
301
+ }
302
+
303
+ # Validate intel cache before proceeding
304
+ missing = check_intel_cache()
305
+ for source in missing:
306
+ results["findings"].append({
307
+ "category": "intel_missing",
308
+ "severity": "critical",
309
+ "description": f"Required intel source {source} is missing or corrupt. Results may be incomplete."
310
+ })
311
+ if not os.path.isdir(INTEL_DIR):
312
+ results["status"] = "fail"
313
+ results["errors"].append("intel cache directory missing")
314
+ return results
315
+
316
+ # P1-3: Check staleness of intel sources
317
+ staleness = check_staleness()
318
+ for s in staleness:
319
+ results["findings"].append(s)
320
+ # If any source is 90+ days stale, override verdict to fail
321
+ if any(s["severity"] == "critical" for s in staleness):
322
+ results["status"] = "fail"
323
+ results["errors"].append("Intel sources critically stale (>= 90 days). Scan results unreliable.")
324
+
325
+ deps = parse_skill_deps(skill_path)
326
+ if not deps:
327
+ results["status"] = "pass"
328
+ results["note"] = "No declared dependencies found"
329
+ return results
330
+
331
+ kev = load_cisa_kev()
332
+ epss = load_epss()
333
+
334
+ # Only load OSV ecosystems for which we found dependencies
335
+ # This prevents cross-matching (e.g., npm "requests" matching PyPI advisory)
336
+ ecosystems_found = set(d["ecosystem"] for d in deps if d["ecosystem"] in ("npm", "PyPI"))
337
+ osv_advisories = {}
338
+
339
+ # For indexed lookups, use the index to load only relevant advisories
340
+ for eco in ecosystems_found:
341
+ eco_dir = os.path.join(INTEL_DIR, "osv", eco)
342
+ index_path = os.path.join(eco_dir, "index.json")
343
+
344
+ if os.path.exists(index_path):
345
+ # Fast path: load only advisories for our dependency names
346
+ try:
347
+ with open(index_path) as f:
348
+ index = json.load(f)
349
+
350
+ # Collect all unique filenames for our deps
351
+ needed_fnames = set()
352
+ for dep in deps:
353
+ key = dep["name"].lower()
354
+ if key in index:
355
+ needed_fnames.update(index[key])
356
+
357
+ # Load only the needed advisory files
358
+ eco_advisories = []
359
+ for fname in needed_fnames:
360
+ fpath = os.path.join(eco_dir, fname)
361
+ try:
362
+ with open(fpath) as f:
363
+ adv = json.load(f)
364
+ for affected in adv.get("affected", []):
365
+ pkg = affected.get("package", {})
366
+ name = pkg.get("name", "")
367
+ pkg_eco = pkg.get("ecosystem", "")
368
+ ranges = affected.get("ranges", [])
369
+ versions = affected.get("versions", [])
370
+ if name:
371
+ eco_advisories.append({
372
+ "id": adv.get("id", ""),
373
+ "summary": adv.get("summary", ""),
374
+ "cve_ids": [a for a in adv.get("aliases", []) if a.startswith("CVE-")],
375
+ "package": name,
376
+ "ecosystem": pkg_eco,
377
+ "ranges": ranges,
378
+ "versions": versions,
379
+ "severity": adv.get("database_specific", {}).get("severity", ""),
380
+ })
381
+ except (json.JSONDecodeError, KeyError, OSError):
382
+ continue
383
+
384
+ osv_advisories[eco] = eco_advisories
385
+ except (json.JSONDecodeError, OSError):
386
+ osv_advisories[eco] = load_osv_ecosystem(eco)
387
+ else:
388
+ # No index, fall back to full scan
389
+ osv_advisories[eco] = load_osv_ecosystem(eco)
390
+
391
+ for dep in deps:
392
+ name = dep["name"]
393
+ ecosystem = dep["ecosystem"]
394
+
395
+ # Only check OSV advisories from the same ecosystem
396
+ if ecosystem in osv_advisories:
397
+ for adv in osv_advisories[ecosystem]:
398
+ if unicodedata.normalize('NFKC', adv["package"].lower()) != unicodedata.normalize('NFKC', name.lower()):
399
+ continue
400
+ ver = dep.get("version", "").lstrip("^~>=<!")
401
+ if not ver:
402
+ continue
403
+ # Check version against both explicit versions list and ranges
404
+ is_vulnerable = False
405
+ if adv.get("versions") and ver in adv["versions"]:
406
+ is_vulnerable = True
407
+ elif adv.get("ranges") and version_in_range(ver, adv["ranges"]):
408
+ is_vulnerable = True
409
+ if is_vulnerable:
410
+ finding = {
411
+ "package": name,
412
+ "version": ver,
413
+ "advisory": adv["id"],
414
+ "summary": adv.get("summary", ""),
415
+ }
416
+ # Check KEV
417
+ for cve in adv.get("cve_ids", []):
418
+ if cve in kev:
419
+ finding["in_kev"] = True
420
+ finding["cve"] = cve
421
+ break
422
+ # Check EPSS
423
+ for cve in adv.get("cve_ids", []):
424
+ if cve in epss:
425
+ prob, pct = epss[cve]
426
+ finding["epss_probability"] = prob
427
+ finding["epss_percentile"] = pct
428
+ break
429
+
430
+ if finding.get("in_kev"):
431
+ finding["severity"] = "critical"
432
+ elif finding.get("epss_probability", 0) > 0.02:
433
+ finding["severity"] = "high"
434
+ elif finding.get("epss_probability", 0) > 0.001:
435
+ finding["severity"] = "medium"
436
+ else:
437
+ finding["severity"] = "low"
438
+
439
+ results["findings"].append(finding)
440
+
441
+ # Determine status
442
+ if any(f.get("in_kev") for f in results["findings"]):
443
+ results["status"] = "fail"
444
+ elif any(f.get("severity") == "high" for f in results["findings"]):
445
+ results["status"] = "warn"
446
+ elif results["findings"]:
447
+ results["status"] = "warn"
448
+
449
+ return results
450
+
451
+ if __name__ == "__main__":
452
+ if len(sys.argv) < 2:
453
+ print("Usage: dep-scan.py <skill_path>")
454
+ sys.exit(1)
455
+ result = check_dependencies(sys.argv[1])
456
+ print(json.dumps(result, indent=2))