cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,1105 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Static pattern analyzer for detecting security vulnerabilities.
19
+ """
20
+
21
+ import hashlib
22
+ import logging
23
+ import re
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ from ...core.models import Finding, Severity, Skill, ThreatCategory
28
+ from ...core.rules.patterns import RuleLoader, SecurityRule
29
+ from ...core.rules.yara_scanner import YaraScanner
30
+ from ...threats.threats import ThreatMapping
31
+ from .base import BaseAnalyzer
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # Pre-compiled regex patterns for file operation checks
36
+ _READ_PATTERNS = [
37
+ re.compile(r"open\([^)]+['\"]r['\"]"),
38
+ re.compile(r"open\([^)]+\)"),
39
+ re.compile(r"\.read\("),
40
+ re.compile(r"\.readline\("),
41
+ re.compile(r"\.readlines\("),
42
+ re.compile(r"Path\([^)]+\)\.read_text"),
43
+ re.compile(r"Path\([^)]+\)\.read_bytes"),
44
+ re.compile(r"with\s+open\([^)]+['\"]r"),
45
+ ]
46
+
47
+ _WRITE_PATTERNS = [
48
+ re.compile(r"open\([^)]+['\"]w['\"]"),
49
+ re.compile(r"\.write\("),
50
+ re.compile(r"\.writelines\("),
51
+ re.compile(r"pathlib\.Path\([^)]+\)\.write"),
52
+ re.compile(r"with\s+open\([^)]+['\"]w"),
53
+ ]
54
+
55
+ _GREP_PATTERNS = [
56
+ re.compile(r"re\.search\("),
57
+ re.compile(r"re\.findall\("),
58
+ re.compile(r"re\.match\("),
59
+ re.compile(r"re\.finditer\("),
60
+ re.compile(r"re\.sub\("),
61
+ re.compile(r"\.search\("),
62
+ re.compile(r"\.findall\("),
63
+ re.compile(r"grep"),
64
+ ]
65
+
66
+ _GLOB_PATTERNS = [
67
+ re.compile(r"glob\.glob\("),
68
+ re.compile(r"glob\.iglob\("),
69
+ re.compile(r"Path\([^)]*\)\.glob\("),
70
+ re.compile(r"\.glob\("),
71
+ re.compile(r"\.rglob\("),
72
+ re.compile(r"fnmatch\."),
73
+ ]
74
+
75
+ _EXCEPTION_PATTERNS = [
76
+ re.compile(r"except\s+(EOFError|StopIteration|KeyboardInterrupt|Exception|BaseException)"),
77
+ re.compile(r"except\s*:"),
78
+ re.compile(r"break\s*$", re.MULTILINE),
79
+ re.compile(r"return\s*$", re.MULTILINE),
80
+ re.compile(r"sys\.exit\s*\("),
81
+ re.compile(r"raise\s+StopIteration"),
82
+ ]
83
+
84
+ _SKILL_NAME_PATTERN = re.compile(r"[a-z0-9-]+")
85
+ _MARKDOWN_LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
86
+ _PYTHON_IMPORT_PATTERN = re.compile(r"^from\s+\.([A-Za-z0-9_.]*)\s+import", re.MULTILINE)
87
+ _BASH_SOURCE_PATTERN = re.compile(r"(?:source|\.)\s+([A-Za-z0-9_\-./]+\.(?:sh|bash))")
88
+ _RM_TARGET_PATTERN = re.compile(r"rm\s+-r[^;]*?\s+([^\s;]+)")
89
+
90
+
91
+ class StaticAnalyzer(BaseAnalyzer):
92
+ """Static pattern-based security analyzer."""
93
+
94
+ def __init__(self, rules_file: Path | None = None, use_yara: bool = True):
95
+ """
96
+ Initialize static analyzer.
97
+
98
+ Args:
99
+ rules_file: Optional custom rules file
100
+ use_yara: Whether to use YARA scanning (default: True)
101
+ """
102
+ super().__init__("static_analyzer")
103
+
104
+ self.rule_loader = RuleLoader(rules_file)
105
+ self.rule_loader.load_rules()
106
+
107
+ self.use_yara = use_yara
108
+ self.yara_scanner = None
109
+ if use_yara:
110
+ try:
111
+ self.yara_scanner = YaraScanner()
112
+ except Exception as e:
113
+ logger.warning("Could not load YARA scanner: %s", e)
114
+ self.yara_scanner = None
115
+
116
+ def analyze(self, skill: Skill) -> list[Finding]:
117
+ """
118
+ Analyze skill using static pattern matching.
119
+
120
+ Performs multi-pass scanning:
121
+ 1. Manifest validation
122
+ 2. Instruction body scanning (SKILL.md)
123
+ 3. Script/code scanning
124
+ 4. Consistency checks
125
+ 5. Reference file scanning
126
+
127
+ Args:
128
+ skill: Skill to analyze
129
+
130
+ Returns:
131
+ List of security findings
132
+ """
133
+ findings = []
134
+
135
+ findings.extend(self._check_manifest(skill))
136
+ findings.extend(self._scan_instruction_body(skill))
137
+ findings.extend(self._scan_scripts(skill))
138
+ findings.extend(self._check_consistency(skill))
139
+ findings.extend(self._scan_referenced_files(skill))
140
+ findings.extend(self._check_binary_files(skill))
141
+
142
+ if self.yara_scanner:
143
+ findings.extend(self._yara_scan(skill))
144
+
145
+ findings.extend(self._scan_asset_files(skill))
146
+
147
+ return findings
148
+
149
+ def _check_manifest(self, skill: Skill) -> list[Finding]:
150
+ """Validate skill manifest for security issues."""
151
+ findings = []
152
+ manifest = skill.manifest
153
+
154
+ if len(manifest.name) > 64 or not _SKILL_NAME_PATTERN.fullmatch(manifest.name or ""):
155
+ findings.append(
156
+ Finding(
157
+ id=self._generate_finding_id("MANIFEST_INVALID_NAME", "manifest"),
158
+ rule_id="MANIFEST_INVALID_NAME",
159
+ category=ThreatCategory.POLICY_VIOLATION,
160
+ severity=Severity.LOW,
161
+ title="Skill name does not follow Claude Skills naming rules",
162
+ description=(
163
+ f"Skill name '{manifest.name}' is invalid. Claude Skills require lowercase letters, numbers, "
164
+ f"and hyphens only, with a maximum length of 64 characters."
165
+ ),
166
+ file_path="SKILL.md",
167
+ remediation="Rename the skill to match `[a-z0-9-]{1,64}` (e.g., 'pdf-processing')",
168
+ analyzer="static",
169
+ )
170
+ )
171
+
172
+ if len(manifest.description or "") > 1024:
173
+ findings.append(
174
+ Finding(
175
+ id=self._generate_finding_id("MANIFEST_DESCRIPTION_TOO_LONG", "manifest"),
176
+ rule_id="MANIFEST_DESCRIPTION_TOO_LONG",
177
+ category=ThreatCategory.POLICY_VIOLATION,
178
+ severity=Severity.LOW,
179
+ title="Skill description exceeds Claude Skills length limit",
180
+ description=(
181
+ f"Skill description is {len(manifest.description)} characters; Claude Skills limit the "
182
+ f"`description` field to 1024 characters."
183
+ ),
184
+ file_path="SKILL.md",
185
+ remediation="Shorten the description to 1024 characters or fewer while keeping it specific",
186
+ analyzer="static",
187
+ )
188
+ )
189
+
190
+ if len(manifest.description) < 20:
191
+ findings.append(
192
+ Finding(
193
+ id=self._generate_finding_id("SOCIAL_ENG_VAGUE_DESCRIPTION", "manifest"),
194
+ rule_id="SOCIAL_ENG_VAGUE_DESCRIPTION",
195
+ category=ThreatCategory.SOCIAL_ENGINEERING,
196
+ severity=Severity.LOW,
197
+ title="Vague skill description",
198
+ description=f"Skill description is too short ({len(manifest.description)} chars). Provide detailed explanation.",
199
+ file_path="SKILL.md",
200
+ remediation="Provide a clear, detailed description of what the skill does and when to use it",
201
+ analyzer="static",
202
+ )
203
+ )
204
+
205
+ description_lower = manifest.description.lower()
206
+ name_lower = manifest.name.lower()
207
+ is_anthropic_mentioned = "anthropic" in name_lower or "anthropic" in description_lower
208
+
209
+ if is_anthropic_mentioned:
210
+ legitimate_patterns = ["apply", "brand", "guidelines", "colors", "typography", "style"]
211
+ is_legitimate = any(pattern in description_lower for pattern in legitimate_patterns)
212
+
213
+ if not is_legitimate:
214
+ findings.append(
215
+ Finding(
216
+ id=self._generate_finding_id("SOCIAL_ENG_ANTHROPIC_IMPERSONATION", "manifest"),
217
+ rule_id="SOCIAL_ENG_ANTHROPIC_IMPERSONATION",
218
+ category=ThreatCategory.SOCIAL_ENGINEERING,
219
+ severity=Severity.MEDIUM,
220
+ title="Potential Anthropic brand impersonation",
221
+ description="Skill name or description contains 'Anthropic', suggesting official affiliation",
222
+ file_path="SKILL.md",
223
+ remediation="Do not impersonate official Anthropic skills or use Anthropic branding without authorization",
224
+ analyzer="static",
225
+ )
226
+ )
227
+
228
+ if "claude official" in manifest.name.lower() or "claude official" in manifest.description.lower():
229
+ findings.append(
230
+ Finding(
231
+ id=self._generate_finding_id("SOCIAL_ENG_CLAUDE_OFFICIAL", "manifest"),
232
+ rule_id="SOCIAL_ENG_ANTHROPIC_IMPERSONATION",
233
+ category=ThreatCategory.SOCIAL_ENGINEERING,
234
+ severity=Severity.HIGH,
235
+ title="Claims to be official Claude skill",
236
+ description="Skill claims to be an 'official' Claude skill",
237
+ file_path="SKILL.md",
238
+ remediation="Remove 'official' claims unless authorized by Anthropic",
239
+ analyzer="static",
240
+ )
241
+ )
242
+
243
+ if not manifest.license:
244
+ findings.append(
245
+ Finding(
246
+ id=self._generate_finding_id("MANIFEST_MISSING_LICENSE", "manifest"),
247
+ rule_id="MANIFEST_MISSING_LICENSE",
248
+ category=ThreatCategory.POLICY_VIOLATION,
249
+ severity=Severity.LOW,
250
+ title="Skill does not specify a license",
251
+ description="Skill manifest does not include a 'license' field. Specifying a license helps users understand usage terms.",
252
+ file_path="SKILL.md",
253
+ remediation="Add 'license' field to SKILL.md frontmatter (e.g., MIT, Apache-2.0)",
254
+ analyzer="static",
255
+ )
256
+ )
257
+
258
+ return findings
259
+
260
+ def _scan_instruction_body(self, skill: Skill) -> list[Finding]:
261
+ """Scan SKILL.md instruction body for prompt injection patterns."""
262
+ findings = []
263
+
264
+ markdown_rules = self.rule_loader.get_rules_for_file_type("markdown")
265
+
266
+ for rule in markdown_rules:
267
+ matches = rule.scan_content(skill.instruction_body, "SKILL.md")
268
+ for match in matches:
269
+ findings.append(self._create_finding_from_match(rule, match))
270
+
271
+ return findings
272
+
273
+ def _scan_scripts(self, skill: Skill) -> list[Finding]:
274
+ """Scan all script files (Python, Bash) for vulnerabilities."""
275
+ findings = []
276
+
277
+ for skill_file in skill.files:
278
+ if skill_file.file_type not in ("python", "bash"):
279
+ continue
280
+
281
+ rules = self.rule_loader.get_rules_for_file_type(skill_file.file_type)
282
+
283
+ content = skill_file.read_content()
284
+ if not content:
285
+ continue
286
+
287
+ for rule in rules:
288
+ matches = rule.scan_content(content, skill_file.relative_path)
289
+ for match in matches:
290
+ if rule.id == "RESOURCE_ABUSE_INFINITE_LOOP" and skill_file.file_type == "python":
291
+ if self._is_loop_with_exception_handler(content, match["line_number"]):
292
+ continue
293
+ findings.append(self._create_finding_from_match(rule, match))
294
+
295
+ return findings
296
+
297
+ def _is_loop_with_exception_handler(self, content: str, loop_line_num: int) -> bool:
298
+ """Check if a while True loop has an exception handler in surrounding context."""
299
+ lines = content.split("\n")
300
+ context_lines = lines[loop_line_num - 1 : min(loop_line_num + 20, len(lines))]
301
+ context_text = "\n".join(context_lines)
302
+
303
+ for pattern in _EXCEPTION_PATTERNS:
304
+ if pattern.search(context_text):
305
+ return True
306
+
307
+ return False
308
+
309
+ def _check_consistency(self, skill: Skill) -> list[Finding]:
310
+ """Check for inconsistencies between manifest and actual behavior."""
311
+ findings = []
312
+
313
+ uses_network = self._skill_uses_network(skill)
314
+ declared_network = self._manifest_declares_network(skill)
315
+
316
+ if uses_network and not declared_network:
317
+ findings.append(
318
+ Finding(
319
+ id=self._generate_finding_id("TOOL_MISMATCH_NETWORK", skill.name),
320
+ rule_id="TOOL_ABUSE_UNDECLARED_NETWORK",
321
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
322
+ severity=Severity.MEDIUM,
323
+ title="Undeclared network usage",
324
+ description="Skill code uses network libraries but doesn't declare network requirement",
325
+ file_path=None,
326
+ remediation="Declare network usage in compatibility field or remove network calls",
327
+ analyzer="static",
328
+ )
329
+ )
330
+
331
+ findings.extend(self._check_allowed_tools_violations(skill))
332
+
333
+ if self._check_description_mismatch(skill):
334
+ findings.append(
335
+ Finding(
336
+ id=self._generate_finding_id("DESC_BEHAVIOR_MISMATCH", skill.name),
337
+ rule_id="SOCIAL_ENG_MISLEADING_DESC",
338
+ category=ThreatCategory.SOCIAL_ENGINEERING,
339
+ severity=Severity.MEDIUM,
340
+ title="Potential description-behavior mismatch",
341
+ description="Skill performs actions not reflected in its description",
342
+ file_path="SKILL.md",
343
+ remediation="Ensure description accurately reflects all skill capabilities",
344
+ analyzer="static",
345
+ )
346
+ )
347
+
348
+ return findings
349
+
350
+ def _scan_referenced_files(self, skill: Skill) -> list[Finding]:
351
+ """Scan files referenced in instruction body with recursive scanning."""
352
+ findings = []
353
+ findings.extend(self._scan_references_recursive(skill, skill.referenced_files, max_depth=5))
354
+ return findings
355
+
356
+ def _scan_references_recursive(
357
+ self,
358
+ skill: Skill,
359
+ references: list[str],
360
+ max_depth: int = 5,
361
+ current_depth: int = 0,
362
+ visited: set[str] | None = None,
363
+ ) -> list[Finding]:
364
+ """
365
+ Recursively scan referenced files up to a maximum depth.
366
+
367
+ This detects lazy-loaded content that might contain malicious patterns
368
+ hidden in nested references.
369
+
370
+ Args:
371
+ skill: The skill being analyzed
372
+ references: List of file paths to scan
373
+ max_depth: Maximum recursion depth
374
+ current_depth: Current depth in recursion
375
+ visited: Set of already-visited files to prevent cycles
376
+
377
+ Returns:
378
+ List of findings from all referenced files
379
+ """
380
+ findings = []
381
+
382
+ if visited is None:
383
+ visited = set()
384
+
385
+ if current_depth > max_depth:
386
+ if references:
387
+ findings.append(
388
+ Finding(
389
+ id=self._generate_finding_id("LAZY_LOAD_DEEP", str(current_depth)),
390
+ rule_id="LAZY_LOAD_DEEP_NESTING",
391
+ category=ThreatCategory.OBFUSCATION,
392
+ severity=Severity.MEDIUM,
393
+ title="Deeply nested file references detected",
394
+ description=(
395
+ f"Skill has file references nested more than {max_depth} levels deep. "
396
+ f"This could be an attempt to hide malicious content in files that are "
397
+ f"only loaded under specific conditions."
398
+ ),
399
+ file_path="SKILL.md",
400
+ remediation="Flatten the reference structure or ensure all nested files are safe",
401
+ analyzer="static",
402
+ )
403
+ )
404
+ return findings
405
+
406
+ for ref_file_path in references:
407
+ if ref_file_path in visited:
408
+ continue
409
+ visited.add(ref_file_path)
410
+
411
+ full_path = skill.directory / ref_file_path
412
+ if not full_path.exists():
413
+ alt_paths = [
414
+ skill.directory / "references" / ref_file_path,
415
+ skill.directory / "assets" / ref_file_path,
416
+ skill.directory / "templates" / ref_file_path,
417
+ skill.directory / "scripts" / ref_file_path,
418
+ ]
419
+ for alt in alt_paths:
420
+ if alt.exists():
421
+ full_path = alt
422
+ break
423
+
424
+ if not full_path.exists():
425
+ continue
426
+
427
+ try:
428
+ with open(full_path, encoding="utf-8") as f:
429
+ content = f.read()
430
+
431
+ suffix = full_path.suffix.lower()
432
+ if suffix in (".md", ".markdown"):
433
+ rules = self.rule_loader.get_rules_for_file_type("markdown")
434
+ elif suffix == ".py":
435
+ rules = self.rule_loader.get_rules_for_file_type("python")
436
+ elif suffix in (".sh", ".bash"):
437
+ rules = self.rule_loader.get_rules_for_file_type("bash")
438
+ else:
439
+ rules = []
440
+
441
+ for rule in rules:
442
+ matches = rule.scan_content(content, ref_file_path)
443
+ for match in matches:
444
+ finding = self._create_finding_from_match(rule, match)
445
+ finding.metadata["reference_depth"] = current_depth
446
+ findings.append(finding)
447
+
448
+ nested_refs = self._extract_references_from_content(full_path, content)
449
+ if nested_refs:
450
+ findings.extend(
451
+ self._scan_references_recursive(skill, nested_refs, max_depth, current_depth + 1, visited)
452
+ )
453
+
454
+ except Exception:
455
+ pass
456
+
457
+ return findings
458
+
459
+ def _extract_references_from_content(self, file_path: Path, content: str) -> list[str]:
460
+ """
461
+ Extract file references from content based on file type.
462
+
463
+ Args:
464
+ file_path: Path to the file
465
+ content: File content
466
+
467
+ Returns:
468
+ List of referenced file paths
469
+ """
470
+ references = []
471
+ suffix = file_path.suffix.lower()
472
+
473
+ if suffix in (".md", ".markdown"):
474
+ markdown_links = _MARKDOWN_LINK_PATTERN.findall(content)
475
+ for _, link in markdown_links:
476
+ if not link.startswith(("http://", "https://", "ftp://", "#")):
477
+ references.append(link)
478
+
479
+ elif suffix == ".py":
480
+ import_patterns = _PYTHON_IMPORT_PATTERN.findall(content)
481
+ for imp in import_patterns:
482
+ if imp:
483
+ references.append(f"{imp}.py")
484
+
485
+ elif suffix in (".sh", ".bash"):
486
+ source_patterns = _BASH_SOURCE_PATTERN.findall(content)
487
+ references.extend(source_patterns)
488
+
489
+ return references
490
+
491
+ def _check_binary_files(self, skill: Skill) -> list[Finding]:
492
+ """Check for binary files in skill package."""
493
+ findings = []
494
+
495
+ ASSET_EXTENSIONS = {
496
+ ".ttf",
497
+ ".otf",
498
+ ".woff",
499
+ ".woff2",
500
+ ".eot",
501
+ ".png",
502
+ ".jpg",
503
+ ".jpeg",
504
+ ".gif",
505
+ ".svg",
506
+ ".webp",
507
+ ".ico",
508
+ ".bmp",
509
+ ".tiff",
510
+ ".tar.gz",
511
+ ".tgz",
512
+ ".zip",
513
+ }
514
+
515
+ for skill_file in skill.files:
516
+ if skill_file.file_type == "binary":
517
+ file_path_obj = Path(skill_file.relative_path)
518
+ ext = file_path_obj.suffix.lower()
519
+ if file_path_obj.name.endswith(".tar.gz"):
520
+ ext = ".tar.gz"
521
+
522
+ if ext in ASSET_EXTENSIONS:
523
+ continue
524
+
525
+ findings.append(
526
+ Finding(
527
+ id=self._generate_finding_id("BINARY_FILE_DETECTED", skill_file.relative_path),
528
+ rule_id="BINARY_FILE_DETECTED",
529
+ category=ThreatCategory.POLICY_VIOLATION,
530
+ severity=Severity.INFO,
531
+ title="Binary file detected in skill package",
532
+ description=f"Binary file found: {skill_file.relative_path}. "
533
+ f"Binary files cannot be inspected by static analysis. "
534
+ f"Consider using Python or Bash scripts for transparency.",
535
+ file_path=skill_file.relative_path,
536
+ remediation="Review binary file necessity. Replace with auditable scripts if possible.",
537
+ analyzer="static",
538
+ )
539
+ )
540
+
541
+ return findings
542
+
543
+ def _skill_uses_network(self, skill: Skill) -> bool:
544
+ """Check if skill code uses network libraries for EXTERNAL communication."""
545
+ external_network_indicators = [
546
+ "import requests",
547
+ "from requests import",
548
+ "import urllib.request",
549
+ "from urllib.request import",
550
+ "import http.client",
551
+ "import httpx",
552
+ "import aiohttp",
553
+ ]
554
+
555
+ socket_external_indicators = ["socket.connect", "socket.create_connection"]
556
+ socket_localhost_indicators = ["localhost", "127.0.0.1", "0.0.0.0"]
557
+
558
+ for skill_file in skill.get_scripts():
559
+ content = skill_file.read_content()
560
+
561
+ if any(indicator in content for indicator in external_network_indicators):
562
+ return True
563
+
564
+ if "import socket" in content:
565
+ has_socket_connect = any(ind in content for ind in socket_external_indicators)
566
+ is_localhost_only = any(ind in content for ind in socket_localhost_indicators)
567
+
568
+ if has_socket_connect and not is_localhost_only:
569
+ return True
570
+
571
+ return False
572
+
573
+ def _manifest_declares_network(self, skill: Skill) -> bool:
574
+ """Check if manifest declares network usage."""
575
+ if skill.manifest.compatibility:
576
+ compatibility_lower = skill.manifest.compatibility.lower()
577
+ return "network" in compatibility_lower or "internet" in compatibility_lower
578
+ return False
579
+
580
+ def _check_description_mismatch(self, skill: Skill) -> bool:
581
+ """Check for description/behavior mismatch (basic heuristic)."""
582
+ description = skill.description.lower()
583
+
584
+ simple_keywords = ["calculator", "format", "template", "style", "lint"]
585
+ if any(keyword in description for keyword in simple_keywords):
586
+ if self._skill_uses_network(skill):
587
+ return True
588
+
589
+ return False
590
+
591
+ def _check_allowed_tools_violations(self, skill: Skill) -> list[Finding]:
592
+ """Check if code behavior violates allowed-tools restrictions."""
593
+ findings = []
594
+
595
+ if not skill.manifest.allowed_tools:
596
+ return findings
597
+
598
+ allowed_tools_lower = [tool.lower() for tool in skill.manifest.allowed_tools]
599
+
600
+ if "read" not in allowed_tools_lower:
601
+ if self._code_reads_files(skill):
602
+ findings.append(
603
+ Finding(
604
+ id=self._generate_finding_id("ALLOWED_TOOLS_READ_VIOLATION", skill.name),
605
+ rule_id="ALLOWED_TOOLS_READ_VIOLATION",
606
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
607
+ severity=Severity.MEDIUM,
608
+ title="Code reads files but Read tool not in allowed-tools",
609
+ description=(
610
+ f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
611
+ f"read files from the filesystem."
612
+ ),
613
+ file_path=None,
614
+ remediation="Add 'Read' to allowed-tools or remove file reading operations from scripts",
615
+ analyzer="static",
616
+ )
617
+ )
618
+
619
+ if "write" not in allowed_tools_lower:
620
+ if self._code_writes_files(skill):
621
+ findings.append(
622
+ Finding(
623
+ id=self._generate_finding_id("ALLOWED_TOOLS_WRITE_VIOLATION", skill.name),
624
+ rule_id="ALLOWED_TOOLS_WRITE_VIOLATION",
625
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
626
+ severity=Severity.HIGH,
627
+ title="Skill declares no Write tool but bundled scripts write files",
628
+ description=(
629
+ f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
630
+ f"write to the filesystem, which conflicts with a read-only tool declaration."
631
+ ),
632
+ file_path=None,
633
+ remediation="Either add 'Write' to allowed-tools (if intentional) or remove filesystem writes from scripts",
634
+ analyzer="static",
635
+ )
636
+ )
637
+
638
+ if "bash" not in allowed_tools_lower:
639
+ if self._code_executes_bash(skill):
640
+ findings.append(
641
+ Finding(
642
+ id=self._generate_finding_id("ALLOWED_TOOLS_BASH_VIOLATION", skill.name),
643
+ rule_id="ALLOWED_TOOLS_BASH_VIOLATION",
644
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
645
+ severity=Severity.HIGH,
646
+ title="Code executes bash but Bash tool not in allowed-tools",
647
+ description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code executes bash commands",
648
+ file_path=None,
649
+ remediation="Add 'Bash' to allowed-tools or remove bash execution from code",
650
+ analyzer="static",
651
+ )
652
+ )
653
+
654
+ if "python" not in allowed_tools_lower:
655
+ python_scripts = [f for f in skill.files if f.file_type == "python" and f.relative_path != "SKILL.md"]
656
+ if python_scripts:
657
+ findings.append(
658
+ Finding(
659
+ id=self._generate_finding_id("ALLOWED_TOOLS_PYTHON_VIOLATION", skill.name),
660
+ rule_id="ALLOWED_TOOLS_PYTHON_VIOLATION",
661
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
662
+ severity=Severity.HIGH,
663
+ title="Python scripts present but Python tool not in allowed-tools",
664
+ description=f"Skill restricts tools to {skill.manifest.allowed_tools} but includes Python scripts",
665
+ file_path=None,
666
+ remediation="Add 'Python' to allowed-tools or remove Python scripts",
667
+ analyzer="static",
668
+ )
669
+ )
670
+
671
+ if "grep" not in allowed_tools_lower:
672
+ if self._code_uses_grep(skill):
673
+ findings.append(
674
+ Finding(
675
+ id=self._generate_finding_id("ALLOWED_TOOLS_GREP_VIOLATION", skill.name),
676
+ rule_id="ALLOWED_TOOLS_GREP_VIOLATION",
677
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
678
+ severity=Severity.LOW,
679
+ title="Code uses search/grep patterns but Grep tool not in allowed-tools",
680
+ description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code uses regex search patterns",
681
+ file_path=None,
682
+ remediation="Add 'Grep' to allowed-tools or remove regex search operations",
683
+ analyzer="static",
684
+ )
685
+ )
686
+
687
+ if "glob" not in allowed_tools_lower:
688
+ if self._code_uses_glob(skill):
689
+ findings.append(
690
+ Finding(
691
+ id=self._generate_finding_id("ALLOWED_TOOLS_GLOB_VIOLATION", skill.name),
692
+ rule_id="ALLOWED_TOOLS_GLOB_VIOLATION",
693
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
694
+ severity=Severity.LOW,
695
+ title="Code uses glob/file patterns but Glob tool not in allowed-tools",
696
+ description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code uses glob patterns",
697
+ file_path=None,
698
+ remediation="Add 'Glob' to allowed-tools or remove glob operations",
699
+ analyzer="static",
700
+ )
701
+ )
702
+
703
+ if self._code_uses_network(skill):
704
+ findings.append(
705
+ Finding(
706
+ id=self._generate_finding_id("ALLOWED_TOOLS_NETWORK_USAGE", skill.name),
707
+ rule_id="ALLOWED_TOOLS_NETWORK_USAGE",
708
+ category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
709
+ severity=Severity.MEDIUM,
710
+ title="Code makes network requests",
711
+ description=(
712
+ "Skill code makes network requests. While not controlled by allowed-tools, "
713
+ "network access should be documented and justified in the skill description."
714
+ ),
715
+ file_path=None,
716
+ remediation="Document network usage in skill description or remove network operations if not needed",
717
+ analyzer="static",
718
+ )
719
+ )
720
+
721
+ return findings
722
+
723
+ def _code_reads_files(self, skill: Skill) -> bool:
724
+ """Check if code contains file reading operations."""
725
+ for skill_file in skill.get_scripts():
726
+ content = skill_file.read_content()
727
+ for pattern in _READ_PATTERNS:
728
+ if pattern.search(content):
729
+ return True
730
+ return False
731
+
732
+ def _code_writes_files(self, skill: Skill) -> bool:
733
+ """Check if code contains file writing operations."""
734
+ for skill_file in skill.get_scripts():
735
+ content = skill_file.read_content()
736
+ for pattern in _WRITE_PATTERNS:
737
+ if pattern.search(content):
738
+ return True
739
+ return False
740
+
741
+ def _code_executes_bash(self, skill: Skill) -> bool:
742
+ """Check if code executes bash/shell commands."""
743
+ bash_indicators = [
744
+ "subprocess.run",
745
+ "subprocess.call",
746
+ "subprocess.Popen",
747
+ "subprocess.check_output",
748
+ "os.system",
749
+ "os.popen",
750
+ "commands.getoutput",
751
+ "shell=True",
752
+ ]
753
+
754
+ has_bash_scripts = any(f.file_type == "bash" for f in skill.files)
755
+ if has_bash_scripts:
756
+ return True
757
+
758
+ for skill_file in skill.get_scripts():
759
+ content = skill_file.read_content()
760
+ if any(indicator in content for indicator in bash_indicators):
761
+ return True
762
+ return False
763
+
764
+ def _code_uses_grep(self, skill: Skill) -> bool:
765
+ """Check if code uses regex search/grep patterns."""
766
+ for skill_file in skill.get_scripts():
767
+ content = skill_file.read_content()
768
+ for pattern in _GREP_PATTERNS:
769
+ if pattern.search(content):
770
+ return True
771
+ return False
772
+
773
+ def _code_uses_glob(self, skill: Skill) -> bool:
774
+ """Check if code uses glob/file pattern matching."""
775
+ for skill_file in skill.get_scripts():
776
+ content = skill_file.read_content()
777
+ for pattern in _GLOB_PATTERNS:
778
+ if pattern.search(content):
779
+ return True
780
+ return False
781
+
782
+ def _code_uses_network(self, skill: Skill) -> bool:
783
+ """Check if code makes network requests."""
784
+ network_indicators = [
785
+ "requests.get",
786
+ "requests.post",
787
+ "requests.put",
788
+ "requests.delete",
789
+ "requests.patch",
790
+ "urllib.request",
791
+ "urllib.urlopen",
792
+ "http.client",
793
+ "httpx.",
794
+ "aiohttp.",
795
+ "socket.connect",
796
+ "socket.create_connection",
797
+ ]
798
+
799
+ for skill_file in skill.get_scripts():
800
+ content = skill_file.read_content()
801
+ if any(indicator in content for indicator in network_indicators):
802
+ return True
803
+ return False
804
+
805
+ def _scan_asset_files(self, skill: Skill) -> list[Finding]:
806
+ """Scan files in assets/, templates/, and references/ directories for injection patterns."""
807
+ findings = []
808
+
809
+ ASSET_DIRS = ["assets", "templates", "references", "data"]
810
+
811
+ ASSET_PATTERNS = [
812
+ (
813
+ re.compile(r"ignore\s+(all\s+)?previous\s+instructions?", re.IGNORECASE),
814
+ "ASSET_PROMPT_INJECTION",
815
+ Severity.HIGH,
816
+ "Prompt injection pattern in asset file",
817
+ ),
818
+ (
819
+ re.compile(r"disregard\s+(all\s+)?prior", re.IGNORECASE),
820
+ "ASSET_PROMPT_INJECTION",
821
+ Severity.HIGH,
822
+ "Prompt override pattern in asset file",
823
+ ),
824
+ (
825
+ re.compile(r"you\s+are\s+now\s+", re.IGNORECASE),
826
+ "ASSET_PROMPT_INJECTION",
827
+ Severity.MEDIUM,
828
+ "Role reassignment pattern in asset file",
829
+ ),
830
+ (
831
+ re.compile(r"https?://[^\s]+\.(tk|ml|ga|cf|gq)/", re.IGNORECASE),
832
+ "ASSET_SUSPICIOUS_URL",
833
+ Severity.MEDIUM,
834
+ "Suspicious free domain URL in asset",
835
+ ),
836
+ ]
837
+
838
+ for skill_file in skill.files:
839
+ path_parts = skill_file.relative_path.split("/")
840
+
841
+ is_asset_file = (
842
+ (len(path_parts) > 1 and path_parts[0] in ASSET_DIRS)
843
+ or skill_file.relative_path.endswith((".template", ".tmpl", ".tpl"))
844
+ or (
845
+ skill_file.file_type == "other"
846
+ and skill_file.relative_path.endswith((".txt", ".json", ".yaml", ".yml"))
847
+ )
848
+ )
849
+
850
+ if not is_asset_file:
851
+ continue
852
+
853
+ content = skill_file.read_content()
854
+ if not content:
855
+ continue
856
+
857
+ for pattern, rule_id, severity, description in ASSET_PATTERNS:
858
+ matches = list(pattern.finditer(content))
859
+
860
+ for match in matches:
861
+ line_number = content[: match.start()].count("\n") + 1
862
+ line_content = content.split("\n")[line_number - 1] if content else ""
863
+
864
+ findings.append(
865
+ Finding(
866
+ id=self._generate_finding_id(rule_id, f"{skill_file.relative_path}:{line_number}"),
867
+ rule_id=rule_id,
868
+ category=ThreatCategory.PROMPT_INJECTION
869
+ if "PROMPT" in rule_id
870
+ else ThreatCategory.COMMAND_INJECTION
871
+ if "CODE" in rule_id or "SCRIPT" in rule_id
872
+ else ThreatCategory.OBFUSCATION
873
+ if "BASE64" in rule_id
874
+ else ThreatCategory.POLICY_VIOLATION,
875
+ severity=severity,
876
+ title=description,
877
+ description=f"Pattern '{match.group()[:50]}...' detected in asset file",
878
+ file_path=skill_file.relative_path,
879
+ line_number=line_number,
880
+ snippet=line_content[:100],
881
+ remediation="Review the asset file and remove any malicious or unnecessary dynamic patterns",
882
+ analyzer="static",
883
+ )
884
+ )
885
+
886
+ return findings
887
+
888
+ def _create_finding_from_match(self, rule: SecurityRule, match: dict[str, Any]) -> Finding:
889
+ """Create a Finding object from a rule match, aligned with AITech taxonomy."""
890
+ threat_mapping = None
891
+ try:
892
+ threat_name = rule.category.value.upper().replace("_", " ")
893
+ threat_mapping = ThreatMapping.get_threat_mapping("static", threat_name)
894
+ except (ValueError, AttributeError):
895
+ pass
896
+
897
+ return Finding(
898
+ id=self._generate_finding_id(rule.id, f"{match.get('file_path', 'unknown')}:{match.get('line_number', 0)}"),
899
+ rule_id=rule.id,
900
+ category=rule.category,
901
+ severity=rule.severity,
902
+ title=rule.description,
903
+ description=f"Pattern detected: {match.get('matched_text', 'N/A')}",
904
+ file_path=match.get("file_path"),
905
+ line_number=match.get("line_number"),
906
+ snippet=match.get("line_content"),
907
+ remediation=rule.remediation,
908
+ analyzer="static",
909
+ metadata={
910
+ "matched_pattern": match.get("matched_pattern"),
911
+ "matched_text": match.get("matched_text"),
912
+ "aitech": threat_mapping.get("aitech") if threat_mapping else None,
913
+ "aitech_name": threat_mapping.get("aitech_name") if threat_mapping else None,
914
+ "scanner_category": threat_mapping.get("scanner_category") if threat_mapping else None,
915
+ },
916
+ )
917
+
918
+ def _generate_finding_id(self, rule_id: str, context: str) -> str:
919
+ """Generate a unique finding ID."""
920
+ combined = f"{rule_id}:{context}"
921
+ hash_obj = hashlib.sha256(combined.encode())
922
+ return f"{rule_id}_{hash_obj.hexdigest()[:10]}"
923
+
924
+ def _yara_scan(self, skill: Skill) -> list[Finding]:
925
+ """Scan skill files with YARA rules."""
926
+ findings = []
927
+
928
+ yara_matches = self.yara_scanner.scan_content(skill.instruction_body, "SKILL.md")
929
+ for match in yara_matches:
930
+ findings.extend(self._create_findings_from_yara_match(match, skill))
931
+
932
+ for skill_file in skill.get_scripts():
933
+ content = skill_file.read_content()
934
+ if content:
935
+ yara_matches = self.yara_scanner.scan_content(content, skill_file.relative_path)
936
+ for match in yara_matches:
937
+ rule_name = match.get("rule_name", "")
938
+ if rule_name == "skill_discovery_abuse":
939
+ continue
940
+ findings.extend(self._create_findings_from_yara_match(match, skill, content))
941
+
942
+ return findings
943
+
944
+ def _create_findings_from_yara_match(
945
+ self, match: dict[str, Any], skill: Skill, file_content: str | None = None
946
+ ) -> list[Finding]:
947
+ """Convert YARA match to Finding objects."""
948
+ findings = []
949
+
950
+ rule_name = match["rule_name"]
951
+ namespace = match["namespace"]
952
+ file_path = match["file_path"]
953
+ meta = match["meta"].get("meta", {})
954
+
955
+ category, severity = self._map_yara_rule_to_threat(rule_name, meta)
956
+
957
+ SAFE_COMMANDS = {
958
+ "soffice",
959
+ "pandoc",
960
+ "wkhtmltopdf",
961
+ "convert",
962
+ "gs",
963
+ "pdftotext",
964
+ "pdfinfo",
965
+ "pdftoppm",
966
+ "pdftohtml",
967
+ "tesseract",
968
+ "ffmpeg",
969
+ "ffprobe",
970
+ "zip",
971
+ "unzip",
972
+ "tar",
973
+ "gzip",
974
+ "gunzip",
975
+ "bzip2",
976
+ "bunzip2",
977
+ "xz",
978
+ "unxz",
979
+ "7z",
980
+ "7za",
981
+ "gtimeout",
982
+ "timeout",
983
+ "grep",
984
+ "head",
985
+ "tail",
986
+ "sort",
987
+ "uniq",
988
+ "wc",
989
+ "file",
990
+ "git",
991
+ }
992
+
993
+ SAFE_CLEANUP_DIRS = {
994
+ "dist",
995
+ "build",
996
+ "tmp",
997
+ "temp",
998
+ ".tmp",
999
+ ".temp",
1000
+ "bundle.html",
1001
+ "bundle.js",
1002
+ "bundle.css",
1003
+ "node_modules",
1004
+ ".next",
1005
+ ".nuxt",
1006
+ ".cache",
1007
+ }
1008
+
1009
+ for string_match in match["strings"]:
1010
+ if rule_name == "code_execution":
1011
+ line_content = string_match.get("line_content", "").lower()
1012
+ matched_data = string_match.get("matched_data", "").lower()
1013
+
1014
+ context_content = ""
1015
+ if file_content:
1016
+ line_num = string_match.get("line_number", 0)
1017
+ if line_num > 0:
1018
+ lines = file_content.split("\n")
1019
+ start_line = max(0, line_num - 4)
1020
+ end_line = min(len(lines), line_num + 5)
1021
+ context_content = "\n".join(lines[start_line:end_line]).lower()
1022
+
1023
+ is_safe_command = any(
1024
+ safe_cmd in line_content or safe_cmd in matched_data or safe_cmd in context_content
1025
+ for safe_cmd in SAFE_COMMANDS
1026
+ )
1027
+
1028
+ if is_safe_command:
1029
+ continue
1030
+
1031
+ if rule_name == "system_manipulation":
1032
+ line_content = string_match.get("line_content", "").lower()
1033
+
1034
+ if "rm -rf" in line_content or "rm -r" in line_content:
1035
+ rm_targets = _RM_TARGET_PATTERN.findall(line_content)
1036
+ if rm_targets:
1037
+ all_safe = all(
1038
+ any(safe_dir in target for safe_dir in SAFE_CLEANUP_DIRS) for target in rm_targets
1039
+ )
1040
+ if all_safe:
1041
+ continue
1042
+
1043
+ finding_id = self._generate_finding_id(f"YARA_{rule_name}", f"{file_path}:{string_match['line_number']}")
1044
+
1045
+ description = meta.get("description", f"YARA rule {rule_name} matched")
1046
+ threat_type = meta.get("threat_type", "SECURITY THREAT")
1047
+
1048
+ findings.append(
1049
+ Finding(
1050
+ id=finding_id,
1051
+ rule_id=f"YARA_{rule_name}",
1052
+ category=category,
1053
+ severity=severity,
1054
+ title=f"{threat_type} detected by YARA",
1055
+ description=f"{description}: {string_match['matched_data'][:100]}",
1056
+ file_path=file_path,
1057
+ line_number=string_match["line_number"],
1058
+ snippet=string_match["line_content"],
1059
+ remediation=f"Review and remove {threat_type.lower()} pattern",
1060
+ analyzer="static",
1061
+ metadata={
1062
+ "yara_rule": rule_name,
1063
+ "yara_namespace": namespace,
1064
+ "matched_string": string_match["identifier"],
1065
+ "threat_type": threat_type,
1066
+ },
1067
+ )
1068
+ )
1069
+
1070
+ return findings
1071
+
1072
+ def _map_yara_rule_to_threat(self, rule_name: str, meta: dict[str, Any]) -> tuple:
1073
+ """Map YARA rule to ThreatCategory and Severity."""
1074
+ threat_type = meta.get("threat_type", "").upper()
1075
+ classification = meta.get("classification", "harmful")
1076
+
1077
+ category_map = {
1078
+ "PROMPT INJECTION": ThreatCategory.PROMPT_INJECTION,
1079
+ "INJECTION ATTACK": ThreatCategory.COMMAND_INJECTION,
1080
+ "COMMAND INJECTION": ThreatCategory.COMMAND_INJECTION,
1081
+ "CREDENTIAL HARVESTING": ThreatCategory.HARDCODED_SECRETS,
1082
+ "DATA EXFILTRATION": ThreatCategory.DATA_EXFILTRATION,
1083
+ "SYSTEM MANIPULATION": ThreatCategory.UNAUTHORIZED_TOOL_USE,
1084
+ "CODE EXECUTION": ThreatCategory.COMMAND_INJECTION,
1085
+ "SQL INJECTION": ThreatCategory.COMMAND_INJECTION,
1086
+ "SKILL DISCOVERY ABUSE": ThreatCategory.SKILL_DISCOVERY_ABUSE,
1087
+ "TRANSITIVE TRUST ABUSE": ThreatCategory.TRANSITIVE_TRUST_ABUSE,
1088
+ "AUTONOMY ABUSE": ThreatCategory.AUTONOMY_ABUSE,
1089
+ "TOOL CHAINING ABUSE": ThreatCategory.TOOL_CHAINING_ABUSE,
1090
+ "UNICODE STEGANOGRAPHY": ThreatCategory.UNICODE_STEGANOGRAPHY,
1091
+ }
1092
+
1093
+ category = category_map.get(threat_type, ThreatCategory.POLICY_VIOLATION)
1094
+
1095
+ if classification == "harmful":
1096
+ if "INJECTION" in threat_type or "CREDENTIAL" in threat_type:
1097
+ severity = Severity.CRITICAL
1098
+ elif "EXFILTRATION" in threat_type or "MANIPULATION" in threat_type:
1099
+ severity = Severity.HIGH
1100
+ else:
1101
+ severity = Severity.MEDIUM
1102
+ else:
1103
+ severity = Severity.LOW
1104
+
1105
+ return category, severity