cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,1105 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Static pattern analyzer for detecting security vulnerabilities.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import logging
|
|
23
|
+
import re
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
from ...core.models import Finding, Severity, Skill, ThreatCategory
|
|
28
|
+
from ...core.rules.patterns import RuleLoader, SecurityRule
|
|
29
|
+
from ...core.rules.yara_scanner import YaraScanner
|
|
30
|
+
from ...threats.threats import ThreatMapping
|
|
31
|
+
from .base import BaseAnalyzer
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# Pre-compiled regex patterns for file operation checks
|
|
36
|
+
_READ_PATTERNS = [
|
|
37
|
+
re.compile(r"open\([^)]+['\"]r['\"]"),
|
|
38
|
+
re.compile(r"open\([^)]+\)"),
|
|
39
|
+
re.compile(r"\.read\("),
|
|
40
|
+
re.compile(r"\.readline\("),
|
|
41
|
+
re.compile(r"\.readlines\("),
|
|
42
|
+
re.compile(r"Path\([^)]+\)\.read_text"),
|
|
43
|
+
re.compile(r"Path\([^)]+\)\.read_bytes"),
|
|
44
|
+
re.compile(r"with\s+open\([^)]+['\"]r"),
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
_WRITE_PATTERNS = [
|
|
48
|
+
re.compile(r"open\([^)]+['\"]w['\"]"),
|
|
49
|
+
re.compile(r"\.write\("),
|
|
50
|
+
re.compile(r"\.writelines\("),
|
|
51
|
+
re.compile(r"pathlib\.Path\([^)]+\)\.write"),
|
|
52
|
+
re.compile(r"with\s+open\([^)]+['\"]w"),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
_GREP_PATTERNS = [
|
|
56
|
+
re.compile(r"re\.search\("),
|
|
57
|
+
re.compile(r"re\.findall\("),
|
|
58
|
+
re.compile(r"re\.match\("),
|
|
59
|
+
re.compile(r"re\.finditer\("),
|
|
60
|
+
re.compile(r"re\.sub\("),
|
|
61
|
+
re.compile(r"\.search\("),
|
|
62
|
+
re.compile(r"\.findall\("),
|
|
63
|
+
re.compile(r"grep"),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
_GLOB_PATTERNS = [
|
|
67
|
+
re.compile(r"glob\.glob\("),
|
|
68
|
+
re.compile(r"glob\.iglob\("),
|
|
69
|
+
re.compile(r"Path\([^)]*\)\.glob\("),
|
|
70
|
+
re.compile(r"\.glob\("),
|
|
71
|
+
re.compile(r"\.rglob\("),
|
|
72
|
+
re.compile(r"fnmatch\."),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
_EXCEPTION_PATTERNS = [
|
|
76
|
+
re.compile(r"except\s+(EOFError|StopIteration|KeyboardInterrupt|Exception|BaseException)"),
|
|
77
|
+
re.compile(r"except\s*:"),
|
|
78
|
+
re.compile(r"break\s*$", re.MULTILINE),
|
|
79
|
+
re.compile(r"return\s*$", re.MULTILINE),
|
|
80
|
+
re.compile(r"sys\.exit\s*\("),
|
|
81
|
+
re.compile(r"raise\s+StopIteration"),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
_SKILL_NAME_PATTERN = re.compile(r"[a-z0-9-]+")
|
|
85
|
+
_MARKDOWN_LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
|
|
86
|
+
_PYTHON_IMPORT_PATTERN = re.compile(r"^from\s+\.([A-Za-z0-9_.]*)\s+import", re.MULTILINE)
|
|
87
|
+
_BASH_SOURCE_PATTERN = re.compile(r"(?:source|\.)\s+([A-Za-z0-9_\-./]+\.(?:sh|bash))")
|
|
88
|
+
_RM_TARGET_PATTERN = re.compile(r"rm\s+-r[^;]*?\s+([^\s;]+)")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class StaticAnalyzer(BaseAnalyzer):
|
|
92
|
+
"""Static pattern-based security analyzer."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, rules_file: Path | None = None, use_yara: bool = True):
|
|
95
|
+
"""
|
|
96
|
+
Initialize static analyzer.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
rules_file: Optional custom rules file
|
|
100
|
+
use_yara: Whether to use YARA scanning (default: True)
|
|
101
|
+
"""
|
|
102
|
+
super().__init__("static_analyzer")
|
|
103
|
+
|
|
104
|
+
self.rule_loader = RuleLoader(rules_file)
|
|
105
|
+
self.rule_loader.load_rules()
|
|
106
|
+
|
|
107
|
+
self.use_yara = use_yara
|
|
108
|
+
self.yara_scanner = None
|
|
109
|
+
if use_yara:
|
|
110
|
+
try:
|
|
111
|
+
self.yara_scanner = YaraScanner()
|
|
112
|
+
except Exception as e:
|
|
113
|
+
logger.warning("Could not load YARA scanner: %s", e)
|
|
114
|
+
self.yara_scanner = None
|
|
115
|
+
|
|
116
|
+
def analyze(self, skill: Skill) -> list[Finding]:
|
|
117
|
+
"""
|
|
118
|
+
Analyze skill using static pattern matching.
|
|
119
|
+
|
|
120
|
+
Performs multi-pass scanning:
|
|
121
|
+
1. Manifest validation
|
|
122
|
+
2. Instruction body scanning (SKILL.md)
|
|
123
|
+
3. Script/code scanning
|
|
124
|
+
4. Consistency checks
|
|
125
|
+
5. Reference file scanning
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
skill: Skill to analyze
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List of security findings
|
|
132
|
+
"""
|
|
133
|
+
findings = []
|
|
134
|
+
|
|
135
|
+
findings.extend(self._check_manifest(skill))
|
|
136
|
+
findings.extend(self._scan_instruction_body(skill))
|
|
137
|
+
findings.extend(self._scan_scripts(skill))
|
|
138
|
+
findings.extend(self._check_consistency(skill))
|
|
139
|
+
findings.extend(self._scan_referenced_files(skill))
|
|
140
|
+
findings.extend(self._check_binary_files(skill))
|
|
141
|
+
|
|
142
|
+
if self.yara_scanner:
|
|
143
|
+
findings.extend(self._yara_scan(skill))
|
|
144
|
+
|
|
145
|
+
findings.extend(self._scan_asset_files(skill))
|
|
146
|
+
|
|
147
|
+
return findings
|
|
148
|
+
|
|
149
|
+
def _check_manifest(self, skill: Skill) -> list[Finding]:
|
|
150
|
+
"""Validate skill manifest for security issues."""
|
|
151
|
+
findings = []
|
|
152
|
+
manifest = skill.manifest
|
|
153
|
+
|
|
154
|
+
if len(manifest.name) > 64 or not _SKILL_NAME_PATTERN.fullmatch(manifest.name or ""):
|
|
155
|
+
findings.append(
|
|
156
|
+
Finding(
|
|
157
|
+
id=self._generate_finding_id("MANIFEST_INVALID_NAME", "manifest"),
|
|
158
|
+
rule_id="MANIFEST_INVALID_NAME",
|
|
159
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
160
|
+
severity=Severity.LOW,
|
|
161
|
+
title="Skill name does not follow Claude Skills naming rules",
|
|
162
|
+
description=(
|
|
163
|
+
f"Skill name '{manifest.name}' is invalid. Claude Skills require lowercase letters, numbers, "
|
|
164
|
+
f"and hyphens only, with a maximum length of 64 characters."
|
|
165
|
+
),
|
|
166
|
+
file_path="SKILL.md",
|
|
167
|
+
remediation="Rename the skill to match `[a-z0-9-]{1,64}` (e.g., 'pdf-processing')",
|
|
168
|
+
analyzer="static",
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if len(manifest.description or "") > 1024:
|
|
173
|
+
findings.append(
|
|
174
|
+
Finding(
|
|
175
|
+
id=self._generate_finding_id("MANIFEST_DESCRIPTION_TOO_LONG", "manifest"),
|
|
176
|
+
rule_id="MANIFEST_DESCRIPTION_TOO_LONG",
|
|
177
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
178
|
+
severity=Severity.LOW,
|
|
179
|
+
title="Skill description exceeds Claude Skills length limit",
|
|
180
|
+
description=(
|
|
181
|
+
f"Skill description is {len(manifest.description)} characters; Claude Skills limit the "
|
|
182
|
+
f"`description` field to 1024 characters."
|
|
183
|
+
),
|
|
184
|
+
file_path="SKILL.md",
|
|
185
|
+
remediation="Shorten the description to 1024 characters or fewer while keeping it specific",
|
|
186
|
+
analyzer="static",
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if len(manifest.description) < 20:
|
|
191
|
+
findings.append(
|
|
192
|
+
Finding(
|
|
193
|
+
id=self._generate_finding_id("SOCIAL_ENG_VAGUE_DESCRIPTION", "manifest"),
|
|
194
|
+
rule_id="SOCIAL_ENG_VAGUE_DESCRIPTION",
|
|
195
|
+
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
196
|
+
severity=Severity.LOW,
|
|
197
|
+
title="Vague skill description",
|
|
198
|
+
description=f"Skill description is too short ({len(manifest.description)} chars). Provide detailed explanation.",
|
|
199
|
+
file_path="SKILL.md",
|
|
200
|
+
remediation="Provide a clear, detailed description of what the skill does and when to use it",
|
|
201
|
+
analyzer="static",
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
description_lower = manifest.description.lower()
|
|
206
|
+
name_lower = manifest.name.lower()
|
|
207
|
+
is_anthropic_mentioned = "anthropic" in name_lower or "anthropic" in description_lower
|
|
208
|
+
|
|
209
|
+
if is_anthropic_mentioned:
|
|
210
|
+
legitimate_patterns = ["apply", "brand", "guidelines", "colors", "typography", "style"]
|
|
211
|
+
is_legitimate = any(pattern in description_lower for pattern in legitimate_patterns)
|
|
212
|
+
|
|
213
|
+
if not is_legitimate:
|
|
214
|
+
findings.append(
|
|
215
|
+
Finding(
|
|
216
|
+
id=self._generate_finding_id("SOCIAL_ENG_ANTHROPIC_IMPERSONATION", "manifest"),
|
|
217
|
+
rule_id="SOCIAL_ENG_ANTHROPIC_IMPERSONATION",
|
|
218
|
+
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
219
|
+
severity=Severity.MEDIUM,
|
|
220
|
+
title="Potential Anthropic brand impersonation",
|
|
221
|
+
description="Skill name or description contains 'Anthropic', suggesting official affiliation",
|
|
222
|
+
file_path="SKILL.md",
|
|
223
|
+
remediation="Do not impersonate official Anthropic skills or use Anthropic branding without authorization",
|
|
224
|
+
analyzer="static",
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if "claude official" in manifest.name.lower() or "claude official" in manifest.description.lower():
|
|
229
|
+
findings.append(
|
|
230
|
+
Finding(
|
|
231
|
+
id=self._generate_finding_id("SOCIAL_ENG_CLAUDE_OFFICIAL", "manifest"),
|
|
232
|
+
rule_id="SOCIAL_ENG_ANTHROPIC_IMPERSONATION",
|
|
233
|
+
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
234
|
+
severity=Severity.HIGH,
|
|
235
|
+
title="Claims to be official Claude skill",
|
|
236
|
+
description="Skill claims to be an 'official' Claude skill",
|
|
237
|
+
file_path="SKILL.md",
|
|
238
|
+
remediation="Remove 'official' claims unless authorized by Anthropic",
|
|
239
|
+
analyzer="static",
|
|
240
|
+
)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if not manifest.license:
|
|
244
|
+
findings.append(
|
|
245
|
+
Finding(
|
|
246
|
+
id=self._generate_finding_id("MANIFEST_MISSING_LICENSE", "manifest"),
|
|
247
|
+
rule_id="MANIFEST_MISSING_LICENSE",
|
|
248
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
249
|
+
severity=Severity.LOW,
|
|
250
|
+
title="Skill does not specify a license",
|
|
251
|
+
description="Skill manifest does not include a 'license' field. Specifying a license helps users understand usage terms.",
|
|
252
|
+
file_path="SKILL.md",
|
|
253
|
+
remediation="Add 'license' field to SKILL.md frontmatter (e.g., MIT, Apache-2.0)",
|
|
254
|
+
analyzer="static",
|
|
255
|
+
)
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return findings
|
|
259
|
+
|
|
260
|
+
def _scan_instruction_body(self, skill: Skill) -> list[Finding]:
|
|
261
|
+
"""Scan SKILL.md instruction body for prompt injection patterns."""
|
|
262
|
+
findings = []
|
|
263
|
+
|
|
264
|
+
markdown_rules = self.rule_loader.get_rules_for_file_type("markdown")
|
|
265
|
+
|
|
266
|
+
for rule in markdown_rules:
|
|
267
|
+
matches = rule.scan_content(skill.instruction_body, "SKILL.md")
|
|
268
|
+
for match in matches:
|
|
269
|
+
findings.append(self._create_finding_from_match(rule, match))
|
|
270
|
+
|
|
271
|
+
return findings
|
|
272
|
+
|
|
273
|
+
def _scan_scripts(self, skill: Skill) -> list[Finding]:
|
|
274
|
+
"""Scan all script files (Python, Bash) for vulnerabilities."""
|
|
275
|
+
findings = []
|
|
276
|
+
|
|
277
|
+
for skill_file in skill.files:
|
|
278
|
+
if skill_file.file_type not in ("python", "bash"):
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
rules = self.rule_loader.get_rules_for_file_type(skill_file.file_type)
|
|
282
|
+
|
|
283
|
+
content = skill_file.read_content()
|
|
284
|
+
if not content:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
for rule in rules:
|
|
288
|
+
matches = rule.scan_content(content, skill_file.relative_path)
|
|
289
|
+
for match in matches:
|
|
290
|
+
if rule.id == "RESOURCE_ABUSE_INFINITE_LOOP" and skill_file.file_type == "python":
|
|
291
|
+
if self._is_loop_with_exception_handler(content, match["line_number"]):
|
|
292
|
+
continue
|
|
293
|
+
findings.append(self._create_finding_from_match(rule, match))
|
|
294
|
+
|
|
295
|
+
return findings
|
|
296
|
+
|
|
297
|
+
def _is_loop_with_exception_handler(self, content: str, loop_line_num: int) -> bool:
|
|
298
|
+
"""Check if a while True loop has an exception handler in surrounding context."""
|
|
299
|
+
lines = content.split("\n")
|
|
300
|
+
context_lines = lines[loop_line_num - 1 : min(loop_line_num + 20, len(lines))]
|
|
301
|
+
context_text = "\n".join(context_lines)
|
|
302
|
+
|
|
303
|
+
for pattern in _EXCEPTION_PATTERNS:
|
|
304
|
+
if pattern.search(context_text):
|
|
305
|
+
return True
|
|
306
|
+
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
def _check_consistency(self, skill: Skill) -> list[Finding]:
|
|
310
|
+
"""Check for inconsistencies between manifest and actual behavior."""
|
|
311
|
+
findings = []
|
|
312
|
+
|
|
313
|
+
uses_network = self._skill_uses_network(skill)
|
|
314
|
+
declared_network = self._manifest_declares_network(skill)
|
|
315
|
+
|
|
316
|
+
if uses_network and not declared_network:
|
|
317
|
+
findings.append(
|
|
318
|
+
Finding(
|
|
319
|
+
id=self._generate_finding_id("TOOL_MISMATCH_NETWORK", skill.name),
|
|
320
|
+
rule_id="TOOL_ABUSE_UNDECLARED_NETWORK",
|
|
321
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
322
|
+
severity=Severity.MEDIUM,
|
|
323
|
+
title="Undeclared network usage",
|
|
324
|
+
description="Skill code uses network libraries but doesn't declare network requirement",
|
|
325
|
+
file_path=None,
|
|
326
|
+
remediation="Declare network usage in compatibility field or remove network calls",
|
|
327
|
+
analyzer="static",
|
|
328
|
+
)
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
findings.extend(self._check_allowed_tools_violations(skill))
|
|
332
|
+
|
|
333
|
+
if self._check_description_mismatch(skill):
|
|
334
|
+
findings.append(
|
|
335
|
+
Finding(
|
|
336
|
+
id=self._generate_finding_id("DESC_BEHAVIOR_MISMATCH", skill.name),
|
|
337
|
+
rule_id="SOCIAL_ENG_MISLEADING_DESC",
|
|
338
|
+
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
339
|
+
severity=Severity.MEDIUM,
|
|
340
|
+
title="Potential description-behavior mismatch",
|
|
341
|
+
description="Skill performs actions not reflected in its description",
|
|
342
|
+
file_path="SKILL.md",
|
|
343
|
+
remediation="Ensure description accurately reflects all skill capabilities",
|
|
344
|
+
analyzer="static",
|
|
345
|
+
)
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return findings
|
|
349
|
+
|
|
350
|
+
def _scan_referenced_files(self, skill: Skill) -> list[Finding]:
|
|
351
|
+
"""Scan files referenced in instruction body with recursive scanning."""
|
|
352
|
+
findings = []
|
|
353
|
+
findings.extend(self._scan_references_recursive(skill, skill.referenced_files, max_depth=5))
|
|
354
|
+
return findings
|
|
355
|
+
|
|
356
|
+
def _scan_references_recursive(
|
|
357
|
+
self,
|
|
358
|
+
skill: Skill,
|
|
359
|
+
references: list[str],
|
|
360
|
+
max_depth: int = 5,
|
|
361
|
+
current_depth: int = 0,
|
|
362
|
+
visited: set[str] | None = None,
|
|
363
|
+
) -> list[Finding]:
|
|
364
|
+
"""
|
|
365
|
+
Recursively scan referenced files up to a maximum depth.
|
|
366
|
+
|
|
367
|
+
This detects lazy-loaded content that might contain malicious patterns
|
|
368
|
+
hidden in nested references.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
skill: The skill being analyzed
|
|
372
|
+
references: List of file paths to scan
|
|
373
|
+
max_depth: Maximum recursion depth
|
|
374
|
+
current_depth: Current depth in recursion
|
|
375
|
+
visited: Set of already-visited files to prevent cycles
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
List of findings from all referenced files
|
|
379
|
+
"""
|
|
380
|
+
findings = []
|
|
381
|
+
|
|
382
|
+
if visited is None:
|
|
383
|
+
visited = set()
|
|
384
|
+
|
|
385
|
+
if current_depth > max_depth:
|
|
386
|
+
if references:
|
|
387
|
+
findings.append(
|
|
388
|
+
Finding(
|
|
389
|
+
id=self._generate_finding_id("LAZY_LOAD_DEEP", str(current_depth)),
|
|
390
|
+
rule_id="LAZY_LOAD_DEEP_NESTING",
|
|
391
|
+
category=ThreatCategory.OBFUSCATION,
|
|
392
|
+
severity=Severity.MEDIUM,
|
|
393
|
+
title="Deeply nested file references detected",
|
|
394
|
+
description=(
|
|
395
|
+
f"Skill has file references nested more than {max_depth} levels deep. "
|
|
396
|
+
f"This could be an attempt to hide malicious content in files that are "
|
|
397
|
+
f"only loaded under specific conditions."
|
|
398
|
+
),
|
|
399
|
+
file_path="SKILL.md",
|
|
400
|
+
remediation="Flatten the reference structure or ensure all nested files are safe",
|
|
401
|
+
analyzer="static",
|
|
402
|
+
)
|
|
403
|
+
)
|
|
404
|
+
return findings
|
|
405
|
+
|
|
406
|
+
for ref_file_path in references:
|
|
407
|
+
if ref_file_path in visited:
|
|
408
|
+
continue
|
|
409
|
+
visited.add(ref_file_path)
|
|
410
|
+
|
|
411
|
+
full_path = skill.directory / ref_file_path
|
|
412
|
+
if not full_path.exists():
|
|
413
|
+
alt_paths = [
|
|
414
|
+
skill.directory / "references" / ref_file_path,
|
|
415
|
+
skill.directory / "assets" / ref_file_path,
|
|
416
|
+
skill.directory / "templates" / ref_file_path,
|
|
417
|
+
skill.directory / "scripts" / ref_file_path,
|
|
418
|
+
]
|
|
419
|
+
for alt in alt_paths:
|
|
420
|
+
if alt.exists():
|
|
421
|
+
full_path = alt
|
|
422
|
+
break
|
|
423
|
+
|
|
424
|
+
if not full_path.exists():
|
|
425
|
+
continue
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
with open(full_path, encoding="utf-8") as f:
|
|
429
|
+
content = f.read()
|
|
430
|
+
|
|
431
|
+
suffix = full_path.suffix.lower()
|
|
432
|
+
if suffix in (".md", ".markdown"):
|
|
433
|
+
rules = self.rule_loader.get_rules_for_file_type("markdown")
|
|
434
|
+
elif suffix == ".py":
|
|
435
|
+
rules = self.rule_loader.get_rules_for_file_type("python")
|
|
436
|
+
elif suffix in (".sh", ".bash"):
|
|
437
|
+
rules = self.rule_loader.get_rules_for_file_type("bash")
|
|
438
|
+
else:
|
|
439
|
+
rules = []
|
|
440
|
+
|
|
441
|
+
for rule in rules:
|
|
442
|
+
matches = rule.scan_content(content, ref_file_path)
|
|
443
|
+
for match in matches:
|
|
444
|
+
finding = self._create_finding_from_match(rule, match)
|
|
445
|
+
finding.metadata["reference_depth"] = current_depth
|
|
446
|
+
findings.append(finding)
|
|
447
|
+
|
|
448
|
+
nested_refs = self._extract_references_from_content(full_path, content)
|
|
449
|
+
if nested_refs:
|
|
450
|
+
findings.extend(
|
|
451
|
+
self._scan_references_recursive(skill, nested_refs, max_depth, current_depth + 1, visited)
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
except Exception:
|
|
455
|
+
pass
|
|
456
|
+
|
|
457
|
+
return findings
|
|
458
|
+
|
|
459
|
+
def _extract_references_from_content(self, file_path: Path, content: str) -> list[str]:
|
|
460
|
+
"""
|
|
461
|
+
Extract file references from content based on file type.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
file_path: Path to the file
|
|
465
|
+
content: File content
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
List of referenced file paths
|
|
469
|
+
"""
|
|
470
|
+
references = []
|
|
471
|
+
suffix = file_path.suffix.lower()
|
|
472
|
+
|
|
473
|
+
if suffix in (".md", ".markdown"):
|
|
474
|
+
markdown_links = _MARKDOWN_LINK_PATTERN.findall(content)
|
|
475
|
+
for _, link in markdown_links:
|
|
476
|
+
if not link.startswith(("http://", "https://", "ftp://", "#")):
|
|
477
|
+
references.append(link)
|
|
478
|
+
|
|
479
|
+
elif suffix == ".py":
|
|
480
|
+
import_patterns = _PYTHON_IMPORT_PATTERN.findall(content)
|
|
481
|
+
for imp in import_patterns:
|
|
482
|
+
if imp:
|
|
483
|
+
references.append(f"{imp}.py")
|
|
484
|
+
|
|
485
|
+
elif suffix in (".sh", ".bash"):
|
|
486
|
+
source_patterns = _BASH_SOURCE_PATTERN.findall(content)
|
|
487
|
+
references.extend(source_patterns)
|
|
488
|
+
|
|
489
|
+
return references
|
|
490
|
+
|
|
491
|
+
def _check_binary_files(self, skill: Skill) -> list[Finding]:
|
|
492
|
+
"""Check for binary files in skill package."""
|
|
493
|
+
findings = []
|
|
494
|
+
|
|
495
|
+
ASSET_EXTENSIONS = {
|
|
496
|
+
".ttf",
|
|
497
|
+
".otf",
|
|
498
|
+
".woff",
|
|
499
|
+
".woff2",
|
|
500
|
+
".eot",
|
|
501
|
+
".png",
|
|
502
|
+
".jpg",
|
|
503
|
+
".jpeg",
|
|
504
|
+
".gif",
|
|
505
|
+
".svg",
|
|
506
|
+
".webp",
|
|
507
|
+
".ico",
|
|
508
|
+
".bmp",
|
|
509
|
+
".tiff",
|
|
510
|
+
".tar.gz",
|
|
511
|
+
".tgz",
|
|
512
|
+
".zip",
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
for skill_file in skill.files:
|
|
516
|
+
if skill_file.file_type == "binary":
|
|
517
|
+
file_path_obj = Path(skill_file.relative_path)
|
|
518
|
+
ext = file_path_obj.suffix.lower()
|
|
519
|
+
if file_path_obj.name.endswith(".tar.gz"):
|
|
520
|
+
ext = ".tar.gz"
|
|
521
|
+
|
|
522
|
+
if ext in ASSET_EXTENSIONS:
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
findings.append(
|
|
526
|
+
Finding(
|
|
527
|
+
id=self._generate_finding_id("BINARY_FILE_DETECTED", skill_file.relative_path),
|
|
528
|
+
rule_id="BINARY_FILE_DETECTED",
|
|
529
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
530
|
+
severity=Severity.INFO,
|
|
531
|
+
title="Binary file detected in skill package",
|
|
532
|
+
description=f"Binary file found: {skill_file.relative_path}. "
|
|
533
|
+
f"Binary files cannot be inspected by static analysis. "
|
|
534
|
+
f"Consider using Python or Bash scripts for transparency.",
|
|
535
|
+
file_path=skill_file.relative_path,
|
|
536
|
+
remediation="Review binary file necessity. Replace with auditable scripts if possible.",
|
|
537
|
+
analyzer="static",
|
|
538
|
+
)
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
return findings
|
|
542
|
+
|
|
543
|
+
def _skill_uses_network(self, skill: Skill) -> bool:
|
|
544
|
+
"""Check if skill code uses network libraries for EXTERNAL communication."""
|
|
545
|
+
external_network_indicators = [
|
|
546
|
+
"import requests",
|
|
547
|
+
"from requests import",
|
|
548
|
+
"import urllib.request",
|
|
549
|
+
"from urllib.request import",
|
|
550
|
+
"import http.client",
|
|
551
|
+
"import httpx",
|
|
552
|
+
"import aiohttp",
|
|
553
|
+
]
|
|
554
|
+
|
|
555
|
+
socket_external_indicators = ["socket.connect", "socket.create_connection"]
|
|
556
|
+
socket_localhost_indicators = ["localhost", "127.0.0.1", "0.0.0.0"]
|
|
557
|
+
|
|
558
|
+
for skill_file in skill.get_scripts():
|
|
559
|
+
content = skill_file.read_content()
|
|
560
|
+
|
|
561
|
+
if any(indicator in content for indicator in external_network_indicators):
|
|
562
|
+
return True
|
|
563
|
+
|
|
564
|
+
if "import socket" in content:
|
|
565
|
+
has_socket_connect = any(ind in content for ind in socket_external_indicators)
|
|
566
|
+
is_localhost_only = any(ind in content for ind in socket_localhost_indicators)
|
|
567
|
+
|
|
568
|
+
if has_socket_connect and not is_localhost_only:
|
|
569
|
+
return True
|
|
570
|
+
|
|
571
|
+
return False
|
|
572
|
+
|
|
573
|
+
def _manifest_declares_network(self, skill: Skill) -> bool:
|
|
574
|
+
"""Check if manifest declares network usage."""
|
|
575
|
+
if skill.manifest.compatibility:
|
|
576
|
+
compatibility_lower = skill.manifest.compatibility.lower()
|
|
577
|
+
return "network" in compatibility_lower or "internet" in compatibility_lower
|
|
578
|
+
return False
|
|
579
|
+
|
|
580
|
+
def _check_description_mismatch(self, skill: Skill) -> bool:
|
|
581
|
+
"""Check for description/behavior mismatch (basic heuristic)."""
|
|
582
|
+
description = skill.description.lower()
|
|
583
|
+
|
|
584
|
+
simple_keywords = ["calculator", "format", "template", "style", "lint"]
|
|
585
|
+
if any(keyword in description for keyword in simple_keywords):
|
|
586
|
+
if self._skill_uses_network(skill):
|
|
587
|
+
return True
|
|
588
|
+
|
|
589
|
+
return False
|
|
590
|
+
|
|
591
|
+
def _check_allowed_tools_violations(self, skill: Skill) -> list[Finding]:
|
|
592
|
+
"""Check if code behavior violates allowed-tools restrictions."""
|
|
593
|
+
findings = []
|
|
594
|
+
|
|
595
|
+
if not skill.manifest.allowed_tools:
|
|
596
|
+
return findings
|
|
597
|
+
|
|
598
|
+
allowed_tools_lower = [tool.lower() for tool in skill.manifest.allowed_tools]
|
|
599
|
+
|
|
600
|
+
if "read" not in allowed_tools_lower:
|
|
601
|
+
if self._code_reads_files(skill):
|
|
602
|
+
findings.append(
|
|
603
|
+
Finding(
|
|
604
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_READ_VIOLATION", skill.name),
|
|
605
|
+
rule_id="ALLOWED_TOOLS_READ_VIOLATION",
|
|
606
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
607
|
+
severity=Severity.MEDIUM,
|
|
608
|
+
title="Code reads files but Read tool not in allowed-tools",
|
|
609
|
+
description=(
|
|
610
|
+
f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
|
|
611
|
+
f"read files from the filesystem."
|
|
612
|
+
),
|
|
613
|
+
file_path=None,
|
|
614
|
+
remediation="Add 'Read' to allowed-tools or remove file reading operations from scripts",
|
|
615
|
+
analyzer="static",
|
|
616
|
+
)
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if "write" not in allowed_tools_lower:
|
|
620
|
+
if self._code_writes_files(skill):
|
|
621
|
+
findings.append(
|
|
622
|
+
Finding(
|
|
623
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_WRITE_VIOLATION", skill.name),
|
|
624
|
+
rule_id="ALLOWED_TOOLS_WRITE_VIOLATION",
|
|
625
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
626
|
+
severity=Severity.HIGH,
|
|
627
|
+
title="Skill declares no Write tool but bundled scripts write files",
|
|
628
|
+
description=(
|
|
629
|
+
f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
|
|
630
|
+
f"write to the filesystem, which conflicts with a read-only tool declaration."
|
|
631
|
+
),
|
|
632
|
+
file_path=None,
|
|
633
|
+
remediation="Either add 'Write' to allowed-tools (if intentional) or remove filesystem writes from scripts",
|
|
634
|
+
analyzer="static",
|
|
635
|
+
)
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
if "bash" not in allowed_tools_lower:
|
|
639
|
+
if self._code_executes_bash(skill):
|
|
640
|
+
findings.append(
|
|
641
|
+
Finding(
|
|
642
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_BASH_VIOLATION", skill.name),
|
|
643
|
+
rule_id="ALLOWED_TOOLS_BASH_VIOLATION",
|
|
644
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
645
|
+
severity=Severity.HIGH,
|
|
646
|
+
title="Code executes bash but Bash tool not in allowed-tools",
|
|
647
|
+
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code executes bash commands",
|
|
648
|
+
file_path=None,
|
|
649
|
+
remediation="Add 'Bash' to allowed-tools or remove bash execution from code",
|
|
650
|
+
analyzer="static",
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
if "python" not in allowed_tools_lower:
|
|
655
|
+
python_scripts = [f for f in skill.files if f.file_type == "python" and f.relative_path != "SKILL.md"]
|
|
656
|
+
if python_scripts:
|
|
657
|
+
findings.append(
|
|
658
|
+
Finding(
|
|
659
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_PYTHON_VIOLATION", skill.name),
|
|
660
|
+
rule_id="ALLOWED_TOOLS_PYTHON_VIOLATION",
|
|
661
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
662
|
+
severity=Severity.HIGH,
|
|
663
|
+
title="Python scripts present but Python tool not in allowed-tools",
|
|
664
|
+
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but includes Python scripts",
|
|
665
|
+
file_path=None,
|
|
666
|
+
remediation="Add 'Python' to allowed-tools or remove Python scripts",
|
|
667
|
+
analyzer="static",
|
|
668
|
+
)
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if "grep" not in allowed_tools_lower:
|
|
672
|
+
if self._code_uses_grep(skill):
|
|
673
|
+
findings.append(
|
|
674
|
+
Finding(
|
|
675
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_GREP_VIOLATION", skill.name),
|
|
676
|
+
rule_id="ALLOWED_TOOLS_GREP_VIOLATION",
|
|
677
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
678
|
+
severity=Severity.LOW,
|
|
679
|
+
title="Code uses search/grep patterns but Grep tool not in allowed-tools",
|
|
680
|
+
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code uses regex search patterns",
|
|
681
|
+
file_path=None,
|
|
682
|
+
remediation="Add 'Grep' to allowed-tools or remove regex search operations",
|
|
683
|
+
analyzer="static",
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
if "glob" not in allowed_tools_lower:
|
|
688
|
+
if self._code_uses_glob(skill):
|
|
689
|
+
findings.append(
|
|
690
|
+
Finding(
|
|
691
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_GLOB_VIOLATION", skill.name),
|
|
692
|
+
rule_id="ALLOWED_TOOLS_GLOB_VIOLATION",
|
|
693
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
694
|
+
severity=Severity.LOW,
|
|
695
|
+
title="Code uses glob/file patterns but Glob tool not in allowed-tools",
|
|
696
|
+
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but code uses glob patterns",
|
|
697
|
+
file_path=None,
|
|
698
|
+
remediation="Add 'Glob' to allowed-tools or remove glob operations",
|
|
699
|
+
analyzer="static",
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
if self._code_uses_network(skill):
|
|
704
|
+
findings.append(
|
|
705
|
+
Finding(
|
|
706
|
+
id=self._generate_finding_id("ALLOWED_TOOLS_NETWORK_USAGE", skill.name),
|
|
707
|
+
rule_id="ALLOWED_TOOLS_NETWORK_USAGE",
|
|
708
|
+
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
709
|
+
severity=Severity.MEDIUM,
|
|
710
|
+
title="Code makes network requests",
|
|
711
|
+
description=(
|
|
712
|
+
"Skill code makes network requests. While not controlled by allowed-tools, "
|
|
713
|
+
"network access should be documented and justified in the skill description."
|
|
714
|
+
),
|
|
715
|
+
file_path=None,
|
|
716
|
+
remediation="Document network usage in skill description or remove network operations if not needed",
|
|
717
|
+
analyzer="static",
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
return findings
|
|
722
|
+
|
|
723
|
+
def _code_reads_files(self, skill: Skill) -> bool:
|
|
724
|
+
"""Check if code contains file reading operations."""
|
|
725
|
+
for skill_file in skill.get_scripts():
|
|
726
|
+
content = skill_file.read_content()
|
|
727
|
+
for pattern in _READ_PATTERNS:
|
|
728
|
+
if pattern.search(content):
|
|
729
|
+
return True
|
|
730
|
+
return False
|
|
731
|
+
|
|
732
|
+
def _code_writes_files(self, skill: Skill) -> bool:
|
|
733
|
+
"""Check if code contains file writing operations."""
|
|
734
|
+
for skill_file in skill.get_scripts():
|
|
735
|
+
content = skill_file.read_content()
|
|
736
|
+
for pattern in _WRITE_PATTERNS:
|
|
737
|
+
if pattern.search(content):
|
|
738
|
+
return True
|
|
739
|
+
return False
|
|
740
|
+
|
|
741
|
+
def _code_executes_bash(self, skill: Skill) -> bool:
|
|
742
|
+
"""Check if code executes bash/shell commands."""
|
|
743
|
+
bash_indicators = [
|
|
744
|
+
"subprocess.run",
|
|
745
|
+
"subprocess.call",
|
|
746
|
+
"subprocess.Popen",
|
|
747
|
+
"subprocess.check_output",
|
|
748
|
+
"os.system",
|
|
749
|
+
"os.popen",
|
|
750
|
+
"commands.getoutput",
|
|
751
|
+
"shell=True",
|
|
752
|
+
]
|
|
753
|
+
|
|
754
|
+
has_bash_scripts = any(f.file_type == "bash" for f in skill.files)
|
|
755
|
+
if has_bash_scripts:
|
|
756
|
+
return True
|
|
757
|
+
|
|
758
|
+
for skill_file in skill.get_scripts():
|
|
759
|
+
content = skill_file.read_content()
|
|
760
|
+
if any(indicator in content for indicator in bash_indicators):
|
|
761
|
+
return True
|
|
762
|
+
return False
|
|
763
|
+
|
|
764
|
+
def _code_uses_grep(self, skill: Skill) -> bool:
|
|
765
|
+
"""Check if code uses regex search/grep patterns."""
|
|
766
|
+
for skill_file in skill.get_scripts():
|
|
767
|
+
content = skill_file.read_content()
|
|
768
|
+
for pattern in _GREP_PATTERNS:
|
|
769
|
+
if pattern.search(content):
|
|
770
|
+
return True
|
|
771
|
+
return False
|
|
772
|
+
|
|
773
|
+
def _code_uses_glob(self, skill: Skill) -> bool:
|
|
774
|
+
"""Check if code uses glob/file pattern matching."""
|
|
775
|
+
for skill_file in skill.get_scripts():
|
|
776
|
+
content = skill_file.read_content()
|
|
777
|
+
for pattern in _GLOB_PATTERNS:
|
|
778
|
+
if pattern.search(content):
|
|
779
|
+
return True
|
|
780
|
+
return False
|
|
781
|
+
|
|
782
|
+
def _code_uses_network(self, skill: Skill) -> bool:
|
|
783
|
+
"""Check if code makes network requests."""
|
|
784
|
+
network_indicators = [
|
|
785
|
+
"requests.get",
|
|
786
|
+
"requests.post",
|
|
787
|
+
"requests.put",
|
|
788
|
+
"requests.delete",
|
|
789
|
+
"requests.patch",
|
|
790
|
+
"urllib.request",
|
|
791
|
+
"urllib.urlopen",
|
|
792
|
+
"http.client",
|
|
793
|
+
"httpx.",
|
|
794
|
+
"aiohttp.",
|
|
795
|
+
"socket.connect",
|
|
796
|
+
"socket.create_connection",
|
|
797
|
+
]
|
|
798
|
+
|
|
799
|
+
for skill_file in skill.get_scripts():
|
|
800
|
+
content = skill_file.read_content()
|
|
801
|
+
if any(indicator in content for indicator in network_indicators):
|
|
802
|
+
return True
|
|
803
|
+
return False
|
|
804
|
+
|
|
805
|
+
def _scan_asset_files(self, skill: Skill) -> list[Finding]:
|
|
806
|
+
"""Scan files in assets/, templates/, and references/ directories for injection patterns."""
|
|
807
|
+
findings = []
|
|
808
|
+
|
|
809
|
+
ASSET_DIRS = ["assets", "templates", "references", "data"]
|
|
810
|
+
|
|
811
|
+
ASSET_PATTERNS = [
|
|
812
|
+
(
|
|
813
|
+
re.compile(r"ignore\s+(all\s+)?previous\s+instructions?", re.IGNORECASE),
|
|
814
|
+
"ASSET_PROMPT_INJECTION",
|
|
815
|
+
Severity.HIGH,
|
|
816
|
+
"Prompt injection pattern in asset file",
|
|
817
|
+
),
|
|
818
|
+
(
|
|
819
|
+
re.compile(r"disregard\s+(all\s+)?prior", re.IGNORECASE),
|
|
820
|
+
"ASSET_PROMPT_INJECTION",
|
|
821
|
+
Severity.HIGH,
|
|
822
|
+
"Prompt override pattern in asset file",
|
|
823
|
+
),
|
|
824
|
+
(
|
|
825
|
+
re.compile(r"you\s+are\s+now\s+", re.IGNORECASE),
|
|
826
|
+
"ASSET_PROMPT_INJECTION",
|
|
827
|
+
Severity.MEDIUM,
|
|
828
|
+
"Role reassignment pattern in asset file",
|
|
829
|
+
),
|
|
830
|
+
(
|
|
831
|
+
re.compile(r"https?://[^\s]+\.(tk|ml|ga|cf|gq)/", re.IGNORECASE),
|
|
832
|
+
"ASSET_SUSPICIOUS_URL",
|
|
833
|
+
Severity.MEDIUM,
|
|
834
|
+
"Suspicious free domain URL in asset",
|
|
835
|
+
),
|
|
836
|
+
]
|
|
837
|
+
|
|
838
|
+
for skill_file in skill.files:
|
|
839
|
+
path_parts = skill_file.relative_path.split("/")
|
|
840
|
+
|
|
841
|
+
is_asset_file = (
|
|
842
|
+
(len(path_parts) > 1 and path_parts[0] in ASSET_DIRS)
|
|
843
|
+
or skill_file.relative_path.endswith((".template", ".tmpl", ".tpl"))
|
|
844
|
+
or (
|
|
845
|
+
skill_file.file_type == "other"
|
|
846
|
+
and skill_file.relative_path.endswith((".txt", ".json", ".yaml", ".yml"))
|
|
847
|
+
)
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
if not is_asset_file:
|
|
851
|
+
continue
|
|
852
|
+
|
|
853
|
+
content = skill_file.read_content()
|
|
854
|
+
if not content:
|
|
855
|
+
continue
|
|
856
|
+
|
|
857
|
+
for pattern, rule_id, severity, description in ASSET_PATTERNS:
|
|
858
|
+
matches = list(pattern.finditer(content))
|
|
859
|
+
|
|
860
|
+
for match in matches:
|
|
861
|
+
line_number = content[: match.start()].count("\n") + 1
|
|
862
|
+
line_content = content.split("\n")[line_number - 1] if content else ""
|
|
863
|
+
|
|
864
|
+
findings.append(
|
|
865
|
+
Finding(
|
|
866
|
+
id=self._generate_finding_id(rule_id, f"{skill_file.relative_path}:{line_number}"),
|
|
867
|
+
rule_id=rule_id,
|
|
868
|
+
category=ThreatCategory.PROMPT_INJECTION
|
|
869
|
+
if "PROMPT" in rule_id
|
|
870
|
+
else ThreatCategory.COMMAND_INJECTION
|
|
871
|
+
if "CODE" in rule_id or "SCRIPT" in rule_id
|
|
872
|
+
else ThreatCategory.OBFUSCATION
|
|
873
|
+
if "BASE64" in rule_id
|
|
874
|
+
else ThreatCategory.POLICY_VIOLATION,
|
|
875
|
+
severity=severity,
|
|
876
|
+
title=description,
|
|
877
|
+
description=f"Pattern '{match.group()[:50]}...' detected in asset file",
|
|
878
|
+
file_path=skill_file.relative_path,
|
|
879
|
+
line_number=line_number,
|
|
880
|
+
snippet=line_content[:100],
|
|
881
|
+
remediation="Review the asset file and remove any malicious or unnecessary dynamic patterns",
|
|
882
|
+
analyzer="static",
|
|
883
|
+
)
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
return findings
|
|
887
|
+
|
|
888
|
+
def _create_finding_from_match(self, rule: SecurityRule, match: dict[str, Any]) -> Finding:
|
|
889
|
+
"""Create a Finding object from a rule match, aligned with AITech taxonomy."""
|
|
890
|
+
threat_mapping = None
|
|
891
|
+
try:
|
|
892
|
+
threat_name = rule.category.value.upper().replace("_", " ")
|
|
893
|
+
threat_mapping = ThreatMapping.get_threat_mapping("static", threat_name)
|
|
894
|
+
except (ValueError, AttributeError):
|
|
895
|
+
pass
|
|
896
|
+
|
|
897
|
+
return Finding(
|
|
898
|
+
id=self._generate_finding_id(rule.id, f"{match.get('file_path', 'unknown')}:{match.get('line_number', 0)}"),
|
|
899
|
+
rule_id=rule.id,
|
|
900
|
+
category=rule.category,
|
|
901
|
+
severity=rule.severity,
|
|
902
|
+
title=rule.description,
|
|
903
|
+
description=f"Pattern detected: {match.get('matched_text', 'N/A')}",
|
|
904
|
+
file_path=match.get("file_path"),
|
|
905
|
+
line_number=match.get("line_number"),
|
|
906
|
+
snippet=match.get("line_content"),
|
|
907
|
+
remediation=rule.remediation,
|
|
908
|
+
analyzer="static",
|
|
909
|
+
metadata={
|
|
910
|
+
"matched_pattern": match.get("matched_pattern"),
|
|
911
|
+
"matched_text": match.get("matched_text"),
|
|
912
|
+
"aitech": threat_mapping.get("aitech") if threat_mapping else None,
|
|
913
|
+
"aitech_name": threat_mapping.get("aitech_name") if threat_mapping else None,
|
|
914
|
+
"scanner_category": threat_mapping.get("scanner_category") if threat_mapping else None,
|
|
915
|
+
},
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
def _generate_finding_id(self, rule_id: str, context: str) -> str:
|
|
919
|
+
"""Generate a unique finding ID."""
|
|
920
|
+
combined = f"{rule_id}:{context}"
|
|
921
|
+
hash_obj = hashlib.sha256(combined.encode())
|
|
922
|
+
return f"{rule_id}_{hash_obj.hexdigest()[:10]}"
|
|
923
|
+
|
|
924
|
+
def _yara_scan(self, skill: Skill) -> list[Finding]:
|
|
925
|
+
"""Scan skill files with YARA rules."""
|
|
926
|
+
findings = []
|
|
927
|
+
|
|
928
|
+
yara_matches = self.yara_scanner.scan_content(skill.instruction_body, "SKILL.md")
|
|
929
|
+
for match in yara_matches:
|
|
930
|
+
findings.extend(self._create_findings_from_yara_match(match, skill))
|
|
931
|
+
|
|
932
|
+
for skill_file in skill.get_scripts():
|
|
933
|
+
content = skill_file.read_content()
|
|
934
|
+
if content:
|
|
935
|
+
yara_matches = self.yara_scanner.scan_content(content, skill_file.relative_path)
|
|
936
|
+
for match in yara_matches:
|
|
937
|
+
rule_name = match.get("rule_name", "")
|
|
938
|
+
if rule_name == "skill_discovery_abuse":
|
|
939
|
+
continue
|
|
940
|
+
findings.extend(self._create_findings_from_yara_match(match, skill, content))
|
|
941
|
+
|
|
942
|
+
return findings
|
|
943
|
+
|
|
944
|
+
def _create_findings_from_yara_match(
|
|
945
|
+
self, match: dict[str, Any], skill: Skill, file_content: str | None = None
|
|
946
|
+
) -> list[Finding]:
|
|
947
|
+
"""Convert YARA match to Finding objects."""
|
|
948
|
+
findings = []
|
|
949
|
+
|
|
950
|
+
rule_name = match["rule_name"]
|
|
951
|
+
namespace = match["namespace"]
|
|
952
|
+
file_path = match["file_path"]
|
|
953
|
+
meta = match["meta"].get("meta", {})
|
|
954
|
+
|
|
955
|
+
category, severity = self._map_yara_rule_to_threat(rule_name, meta)
|
|
956
|
+
|
|
957
|
+
SAFE_COMMANDS = {
|
|
958
|
+
"soffice",
|
|
959
|
+
"pandoc",
|
|
960
|
+
"wkhtmltopdf",
|
|
961
|
+
"convert",
|
|
962
|
+
"gs",
|
|
963
|
+
"pdftotext",
|
|
964
|
+
"pdfinfo",
|
|
965
|
+
"pdftoppm",
|
|
966
|
+
"pdftohtml",
|
|
967
|
+
"tesseract",
|
|
968
|
+
"ffmpeg",
|
|
969
|
+
"ffprobe",
|
|
970
|
+
"zip",
|
|
971
|
+
"unzip",
|
|
972
|
+
"tar",
|
|
973
|
+
"gzip",
|
|
974
|
+
"gunzip",
|
|
975
|
+
"bzip2",
|
|
976
|
+
"bunzip2",
|
|
977
|
+
"xz",
|
|
978
|
+
"unxz",
|
|
979
|
+
"7z",
|
|
980
|
+
"7za",
|
|
981
|
+
"gtimeout",
|
|
982
|
+
"timeout",
|
|
983
|
+
"grep",
|
|
984
|
+
"head",
|
|
985
|
+
"tail",
|
|
986
|
+
"sort",
|
|
987
|
+
"uniq",
|
|
988
|
+
"wc",
|
|
989
|
+
"file",
|
|
990
|
+
"git",
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
SAFE_CLEANUP_DIRS = {
|
|
994
|
+
"dist",
|
|
995
|
+
"build",
|
|
996
|
+
"tmp",
|
|
997
|
+
"temp",
|
|
998
|
+
".tmp",
|
|
999
|
+
".temp",
|
|
1000
|
+
"bundle.html",
|
|
1001
|
+
"bundle.js",
|
|
1002
|
+
"bundle.css",
|
|
1003
|
+
"node_modules",
|
|
1004
|
+
".next",
|
|
1005
|
+
".nuxt",
|
|
1006
|
+
".cache",
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
for string_match in match["strings"]:
|
|
1010
|
+
if rule_name == "code_execution":
|
|
1011
|
+
line_content = string_match.get("line_content", "").lower()
|
|
1012
|
+
matched_data = string_match.get("matched_data", "").lower()
|
|
1013
|
+
|
|
1014
|
+
context_content = ""
|
|
1015
|
+
if file_content:
|
|
1016
|
+
line_num = string_match.get("line_number", 0)
|
|
1017
|
+
if line_num > 0:
|
|
1018
|
+
lines = file_content.split("\n")
|
|
1019
|
+
start_line = max(0, line_num - 4)
|
|
1020
|
+
end_line = min(len(lines), line_num + 5)
|
|
1021
|
+
context_content = "\n".join(lines[start_line:end_line]).lower()
|
|
1022
|
+
|
|
1023
|
+
is_safe_command = any(
|
|
1024
|
+
safe_cmd in line_content or safe_cmd in matched_data or safe_cmd in context_content
|
|
1025
|
+
for safe_cmd in SAFE_COMMANDS
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
if is_safe_command:
|
|
1029
|
+
continue
|
|
1030
|
+
|
|
1031
|
+
if rule_name == "system_manipulation":
|
|
1032
|
+
line_content = string_match.get("line_content", "").lower()
|
|
1033
|
+
|
|
1034
|
+
if "rm -rf" in line_content or "rm -r" in line_content:
|
|
1035
|
+
rm_targets = _RM_TARGET_PATTERN.findall(line_content)
|
|
1036
|
+
if rm_targets:
|
|
1037
|
+
all_safe = all(
|
|
1038
|
+
any(safe_dir in target for safe_dir in SAFE_CLEANUP_DIRS) for target in rm_targets
|
|
1039
|
+
)
|
|
1040
|
+
if all_safe:
|
|
1041
|
+
continue
|
|
1042
|
+
|
|
1043
|
+
finding_id = self._generate_finding_id(f"YARA_{rule_name}", f"{file_path}:{string_match['line_number']}")
|
|
1044
|
+
|
|
1045
|
+
description = meta.get("description", f"YARA rule {rule_name} matched")
|
|
1046
|
+
threat_type = meta.get("threat_type", "SECURITY THREAT")
|
|
1047
|
+
|
|
1048
|
+
findings.append(
|
|
1049
|
+
Finding(
|
|
1050
|
+
id=finding_id,
|
|
1051
|
+
rule_id=f"YARA_{rule_name}",
|
|
1052
|
+
category=category,
|
|
1053
|
+
severity=severity,
|
|
1054
|
+
title=f"{threat_type} detected by YARA",
|
|
1055
|
+
description=f"{description}: {string_match['matched_data'][:100]}",
|
|
1056
|
+
file_path=file_path,
|
|
1057
|
+
line_number=string_match["line_number"],
|
|
1058
|
+
snippet=string_match["line_content"],
|
|
1059
|
+
remediation=f"Review and remove {threat_type.lower()} pattern",
|
|
1060
|
+
analyzer="static",
|
|
1061
|
+
metadata={
|
|
1062
|
+
"yara_rule": rule_name,
|
|
1063
|
+
"yara_namespace": namespace,
|
|
1064
|
+
"matched_string": string_match["identifier"],
|
|
1065
|
+
"threat_type": threat_type,
|
|
1066
|
+
},
|
|
1067
|
+
)
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
return findings
|
|
1071
|
+
|
|
1072
|
+
def _map_yara_rule_to_threat(self, rule_name: str, meta: dict[str, Any]) -> tuple:
|
|
1073
|
+
"""Map YARA rule to ThreatCategory and Severity."""
|
|
1074
|
+
threat_type = meta.get("threat_type", "").upper()
|
|
1075
|
+
classification = meta.get("classification", "harmful")
|
|
1076
|
+
|
|
1077
|
+
category_map = {
|
|
1078
|
+
"PROMPT INJECTION": ThreatCategory.PROMPT_INJECTION,
|
|
1079
|
+
"INJECTION ATTACK": ThreatCategory.COMMAND_INJECTION,
|
|
1080
|
+
"COMMAND INJECTION": ThreatCategory.COMMAND_INJECTION,
|
|
1081
|
+
"CREDENTIAL HARVESTING": ThreatCategory.HARDCODED_SECRETS,
|
|
1082
|
+
"DATA EXFILTRATION": ThreatCategory.DATA_EXFILTRATION,
|
|
1083
|
+
"SYSTEM MANIPULATION": ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
1084
|
+
"CODE EXECUTION": ThreatCategory.COMMAND_INJECTION,
|
|
1085
|
+
"SQL INJECTION": ThreatCategory.COMMAND_INJECTION,
|
|
1086
|
+
"SKILL DISCOVERY ABUSE": ThreatCategory.SKILL_DISCOVERY_ABUSE,
|
|
1087
|
+
"TRANSITIVE TRUST ABUSE": ThreatCategory.TRANSITIVE_TRUST_ABUSE,
|
|
1088
|
+
"AUTONOMY ABUSE": ThreatCategory.AUTONOMY_ABUSE,
|
|
1089
|
+
"TOOL CHAINING ABUSE": ThreatCategory.TOOL_CHAINING_ABUSE,
|
|
1090
|
+
"UNICODE STEGANOGRAPHY": ThreatCategory.UNICODE_STEGANOGRAPHY,
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
category = category_map.get(threat_type, ThreatCategory.POLICY_VIOLATION)
|
|
1094
|
+
|
|
1095
|
+
if classification == "harmful":
|
|
1096
|
+
if "INJECTION" in threat_type or "CREDENTIAL" in threat_type:
|
|
1097
|
+
severity = Severity.CRITICAL
|
|
1098
|
+
elif "EXFILTRATION" in threat_type or "MANIPULATION" in threat_type:
|
|
1099
|
+
severity = Severity.HIGH
|
|
1100
|
+
else:
|
|
1101
|
+
severity = Severity.MEDIUM
|
|
1102
|
+
else:
|
|
1103
|
+
severity = Severity.LOW
|
|
1104
|
+
|
|
1105
|
+
return category, severity
|