skilllite 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,420 @@
1
+ """
2
+ Security scanning module for SkillLite core.
3
+
4
+ Provides SecurityScanResult and security scanning utilities that can be used
5
+ by SkillRunner, AgenticLoop, and adapters.
6
+
7
+ Usage:
8
+ from skilllite.core.security import SecurityScanner, SecurityScanResult
9
+
10
+ scanner = SecurityScanner()
11
+ result = scanner.scan_skill(skill_info, input_data)
12
+
13
+ if result.requires_confirmation:
14
+ # Ask user for confirmation
15
+ if confirmation_callback(result.format_report(), result.scan_id):
16
+ # User confirmed, proceed with execution
17
+ pass
18
+ """
19
+
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
23
+ import hashlib
24
+ import subprocess
25
+ import time
26
+ import uuid
27
+
28
+ if TYPE_CHECKING:
29
+ from .skill_info import SkillInfo
30
+
31
+
32
+ # Type alias for confirmation callback
33
+ # Signature: (security_report: str, scan_id: str) -> bool
34
+ ConfirmationCallback = Callable[[str, str], bool]
35
+
36
+
37
+ @dataclass
38
+ class SecurityScanResult:
39
+ """Result of a security scan."""
40
+
41
+ is_safe: bool
42
+ issues: List[Dict[str, Any]] = field(default_factory=list)
43
+ scan_id: str = ""
44
+ code_hash: str = ""
45
+ high_severity_count: int = 0
46
+ medium_severity_count: int = 0
47
+ low_severity_count: int = 0
48
+ timestamp: float = field(default_factory=time.time)
49
+
50
+ @property
51
+ def requires_confirmation(self) -> bool:
52
+ """Check if user confirmation is required."""
53
+ return self.high_severity_count > 0
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ return {
57
+ "is_safe": self.is_safe,
58
+ "issues": self.issues,
59
+ "scan_id": self.scan_id,
60
+ "code_hash": self.code_hash,
61
+ "high_severity_count": self.high_severity_count,
62
+ "medium_severity_count": self.medium_severity_count,
63
+ "low_severity_count": self.low_severity_count,
64
+ "requires_confirmation": self.requires_confirmation,
65
+ }
66
+
67
+ def format_report(self) -> str:
68
+ """Format a human-readable security report."""
69
+ if not self.issues:
70
+ return "✅ Security scan passed. No issues found."
71
+
72
+ lines = [
73
+ f"📋 Security Scan Report (ID: {self.scan_id[:8]})",
74
+ f" Found {len(self.issues)} item(s) for review:",
75
+ "",
76
+ ]
77
+
78
+ severity_icons = {
79
+ "Critical": "🔴",
80
+ "High": "🟠",
81
+ "Medium": "🟡",
82
+ "Low": "🟢",
83
+ }
84
+
85
+ for idx, issue in enumerate(self.issues, 1):
86
+ severity = issue.get("severity", "Medium")
87
+ icon = severity_icons.get(severity, "⚪")
88
+ lines.append(f" {icon} #{idx} [{severity}] {issue.get('issue_type', 'Unknown')}")
89
+ lines.append(f" ├─ Rule: {issue.get('rule_id', 'N/A')}")
90
+ lines.append(f" ├─ Line {issue.get('line_number', '?')}: {issue.get('description', '')}")
91
+ snippet = issue.get('code_snippet', '')
92
+ lines.append(f" └─ Code: {snippet[:60]}{'...' if len(snippet) > 60 else ''}")
93
+ lines.append("")
94
+
95
+ if self.high_severity_count > 0:
96
+ lines.append("⚠️ High severity issues found. Confirmation required to execute.")
97
+ else:
98
+ lines.append("ℹ️ Only low/medium severity issues found. Safe to execute.")
99
+
100
+ return "\n".join(lines)
101
+
102
+
103
+ class SecurityScanner:
104
+ """
105
+ Security scanner for skill execution.
106
+
107
+ Uses skillbox binary to perform static code analysis before execution.
108
+
109
+ This class supports singleton pattern for shared instance across
110
+ all entry points (AgenticLoop, LangChain, LlamaIndex, MCP).
111
+ """
112
+
113
+ _instance: Optional["SecurityScanner"] = None
114
+
115
+ @classmethod
116
+ def get_instance(cls) -> "SecurityScanner":
117
+ """Get singleton instance of the scanner."""
118
+ if cls._instance is None:
119
+ cls._instance = cls()
120
+ return cls._instance
121
+
122
+ @classmethod
123
+ def reset_instance(cls) -> None:
124
+ """Reset singleton instance (for testing)."""
125
+ cls._instance = None
126
+
127
+ def __init__(self, skillbox_path: Optional[str] = None):
128
+ """
129
+ Initialize the security scanner.
130
+
131
+ Args:
132
+ skillbox_path: Path to skillbox binary. If None, will try to find it.
133
+ """
134
+ self._skillbox_path = skillbox_path
135
+ self._scan_cache: Dict[str, SecurityScanResult] = {}
136
+ self._SCAN_CACHE_TTL = 300 # 5 minutes
137
+
138
+ @property
139
+ def skillbox_path(self) -> Optional[str]:
140
+ """Get skillbox binary path (lazy initialization)."""
141
+ if self._skillbox_path is None:
142
+ try:
143
+ from ..sandbox.skillbox import find_binary
144
+ self._skillbox_path = find_binary()
145
+ except Exception:
146
+ pass
147
+ return self._skillbox_path
148
+
149
+ def _generate_input_hash(self, skill_name: str, input_data: Dict[str, Any]) -> str:
150
+ """Generate a hash of the input data for verification."""
151
+ import json
152
+ content = f"{skill_name}:{json.dumps(input_data, sort_keys=True, ensure_ascii=False)}"
153
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
154
+
155
+ def _cleanup_expired_scans(self) -> None:
156
+ """Remove expired scan results from cache."""
157
+ current_time = time.time()
158
+ expired_keys = [
159
+ k for k, v in self._scan_cache.items()
160
+ if current_time - v.timestamp > self._SCAN_CACHE_TTL
161
+ ]
162
+ for key in expired_keys:
163
+ del self._scan_cache[key]
164
+
165
+ def _parse_scan_output(self, output: str) -> List[Dict[str, Any]]:
166
+ """Parse skillbox scan output into structured issues."""
167
+ issues = []
168
+ current_issue: Optional[Dict[str, Any]] = None
169
+
170
+ for line in output.split('\n'):
171
+ line = line.strip()
172
+ if not line:
173
+ continue
174
+
175
+ # Detect severity markers
176
+ if any(sev in line for sev in ['[Critical]', '[High]', '[Medium]', '[Low]']):
177
+ if current_issue:
178
+ issues.append(current_issue)
179
+
180
+ severity = "Medium"
181
+ for sev in ['Critical', 'High', 'Medium', 'Low']:
182
+ if f'[{sev}]' in line:
183
+ severity = sev
184
+ break
185
+
186
+ current_issue = {
187
+ "severity": severity,
188
+ "issue_type": "SecurityIssue",
189
+ "description": line,
190
+ "rule_id": "unknown",
191
+ "line_number": 0,
192
+ "code_snippet": "",
193
+ }
194
+ elif current_issue:
195
+ # Try to extract line number
196
+ if "line" in line.lower() and ":" in line:
197
+ try:
198
+ parts = line.split(":")
199
+ for part in parts:
200
+ if part.strip().isdigit():
201
+ current_issue["line_number"] = int(part.strip())
202
+ break
203
+ except (ValueError, IndexError):
204
+ pass
205
+ # Append to description
206
+ if current_issue["code_snippet"]:
207
+ current_issue["code_snippet"] += " " + line
208
+ else:
209
+ current_issue["code_snippet"] = line
210
+
211
+ if current_issue:
212
+ issues.append(current_issue)
213
+
214
+ return issues
215
+
216
+ def scan_skill(
217
+ self,
218
+ skill_info: "SkillInfo",
219
+ input_data: Dict[str, Any],
220
+ entry_point: Optional[str] = None
221
+ ) -> SecurityScanResult:
222
+ """
223
+ Perform a security scan on a skill before execution.
224
+
225
+ Args:
226
+ skill_info: SkillInfo object for the skill
227
+ input_data: Input data for the skill execution
228
+ entry_point: Optional specific entry point script
229
+
230
+ Returns:
231
+ SecurityScanResult with any issues found
232
+ """
233
+ self._cleanup_expired_scans()
234
+
235
+ skill_name = skill_info.name
236
+ input_hash = self._generate_input_hash(skill_name, input_data)
237
+ scan_id = str(uuid.uuid4())
238
+
239
+ # Determine entry point
240
+ if entry_point:
241
+ entry_script = skill_info.path / entry_point
242
+ elif skill_info.metadata and skill_info.metadata.entry_point:
243
+ entry_script = skill_info.path / skill_info.metadata.entry_point
244
+ else:
245
+ # Default entry points
246
+ for default_entry in ["scripts/main.py", "main.py"]:
247
+ entry_script = skill_info.path / default_entry
248
+ if entry_script.exists():
249
+ break
250
+ else:
251
+ # No entry point found, return safe result
252
+ return SecurityScanResult(
253
+ is_safe=True,
254
+ issues=[],
255
+ scan_id=scan_id,
256
+ code_hash=input_hash,
257
+ )
258
+
259
+ if not entry_script.exists():
260
+ return SecurityScanResult(
261
+ is_safe=True,
262
+ issues=[],
263
+ scan_id=scan_id,
264
+ code_hash=input_hash,
265
+ )
266
+
267
+ # Use skillbox security-scan command
268
+ if not self.skillbox_path:
269
+ return SecurityScanResult(
270
+ is_safe=True,
271
+ issues=[],
272
+ scan_id=scan_id,
273
+ code_hash=input_hash,
274
+ )
275
+
276
+ try:
277
+ result = subprocess.run(
278
+ [self.skillbox_path, "security-scan", str(entry_script)],
279
+ capture_output=True,
280
+ text=True,
281
+ timeout=30
282
+ )
283
+
284
+ # Parse scan output
285
+ issues = self._parse_scan_output(result.stdout + result.stderr)
286
+ high_count = sum(1 for i in issues if i.get("severity") in ["Critical", "High"])
287
+ medium_count = sum(1 for i in issues if i.get("severity") == "Medium")
288
+ low_count = sum(1 for i in issues if i.get("severity") == "Low")
289
+
290
+ scan_result = SecurityScanResult(
291
+ is_safe=high_count == 0,
292
+ issues=issues,
293
+ scan_id=scan_id,
294
+ code_hash=input_hash,
295
+ high_severity_count=high_count,
296
+ medium_severity_count=medium_count,
297
+ low_severity_count=low_count,
298
+ )
299
+ self._scan_cache[scan_id] = scan_result
300
+ return scan_result
301
+
302
+ except Exception:
303
+ # On error, return safe result
304
+ return SecurityScanResult(
305
+ is_safe=True,
306
+ issues=[],
307
+ scan_id=scan_id,
308
+ code_hash=input_hash,
309
+ )
310
+
311
+ def scan_code(
312
+ self,
313
+ language: str,
314
+ code: str,
315
+ sandbox_level: int = 3
316
+ ) -> SecurityScanResult:
317
+ """
318
+ Perform a security scan on arbitrary code.
319
+
320
+ This is used by MCP server to scan code before execution.
321
+
322
+ Args:
323
+ language: Programming language (python, javascript, etc.)
324
+ code: Code to scan
325
+ sandbox_level: Sandbox level (1, 2, or 3)
326
+
327
+ Returns:
328
+ SecurityScanResult with any issues found
329
+ """
330
+ import tempfile
331
+ import os
332
+
333
+ scan_id = str(uuid.uuid4())
334
+ code_hash = hashlib.sha256(code.encode()).hexdigest()[:16]
335
+
336
+ # Skip scanning for level 1/2
337
+ if sandbox_level < 3:
338
+ return SecurityScanResult(
339
+ is_safe=True,
340
+ issues=[],
341
+ scan_id=scan_id,
342
+ code_hash=code_hash,
343
+ )
344
+
345
+ if not self.skillbox_path:
346
+ return SecurityScanResult(
347
+ is_safe=True,
348
+ issues=[],
349
+ scan_id=scan_id,
350
+ code_hash=code_hash,
351
+ )
352
+
353
+ # Determine file extension
354
+ ext_map = {
355
+ "python": ".py",
356
+ "py": ".py",
357
+ "javascript": ".js",
358
+ "js": ".js",
359
+ "bash": ".sh",
360
+ "shell": ".sh",
361
+ }
362
+ ext = ext_map.get(language.lower(), ".txt")
363
+
364
+ # Write code to temp file and scan
365
+ try:
366
+ with tempfile.NamedTemporaryFile(
367
+ mode='w',
368
+ suffix=ext,
369
+ delete=False
370
+ ) as f:
371
+ f.write(code)
372
+ temp_path = f.name
373
+
374
+ try:
375
+ result = subprocess.run(
376
+ [self.skillbox_path, "security-scan", temp_path],
377
+ capture_output=True,
378
+ text=True,
379
+ timeout=30
380
+ )
381
+
382
+ issues = self._parse_scan_output(result.stdout + result.stderr)
383
+ high_count = sum(1 for i in issues if i.get("severity") in ["Critical", "High"])
384
+ medium_count = sum(1 for i in issues if i.get("severity") == "Medium")
385
+ low_count = sum(1 for i in issues if i.get("severity") == "Low")
386
+
387
+ scan_result = SecurityScanResult(
388
+ is_safe=high_count == 0,
389
+ issues=issues,
390
+ scan_id=scan_id,
391
+ code_hash=code_hash,
392
+ high_severity_count=high_count,
393
+ medium_severity_count=medium_count,
394
+ low_severity_count=low_count,
395
+ )
396
+ self._scan_cache[scan_id] = scan_result
397
+ return scan_result
398
+ finally:
399
+ os.unlink(temp_path)
400
+
401
+ except Exception:
402
+ return SecurityScanResult(
403
+ is_safe=True,
404
+ issues=[],
405
+ scan_id=scan_id,
406
+ code_hash=code_hash,
407
+ )
408
+
409
+ def get_cached_scan(self, scan_id: str) -> Optional[SecurityScanResult]:
410
+ """Get a cached scan result by ID."""
411
+ self._cleanup_expired_scans()
412
+ return self._scan_cache.get(scan_id)
413
+
414
+ def verify_scan(self, scan_id: str, code_hash: str) -> bool:
415
+ """Verify that a scan ID matches the expected code hash."""
416
+ cached = self.get_cached_scan(scan_id)
417
+ if cached is None:
418
+ return False
419
+ return cached.code_hash == code_hash
420
+