cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Cross-skill analyzer for detecting coordinated attacks across multiple skills.
|
|
19
|
+
|
|
20
|
+
This analyzer looks for patterns that suggest multiple skills are working together
|
|
21
|
+
to perform malicious activities, such as:
|
|
22
|
+
- Data relay patterns (one skill collects data, another exfiltrates)
|
|
23
|
+
- Shared external URLs across skills
|
|
24
|
+
- Complementary trigger descriptions
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import re
|
|
28
|
+
|
|
29
|
+
from ..models import Finding, Severity, Skill, ThreatCategory
|
|
30
|
+
from .base import BaseAnalyzer
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CrossSkillAnalyzer(BaseAnalyzer):
|
|
34
|
+
"""
|
|
35
|
+
Analyzes multiple skills together to detect coordinated attack patterns.
|
|
36
|
+
|
|
37
|
+
This analyzer is designed to be run on a collection of skills rather than
|
|
38
|
+
a single skill, looking for suspicious patterns that only emerge when
|
|
39
|
+
analyzing skills in relation to each other.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self):
|
|
43
|
+
"""Initialize cross-skill analyzer."""
|
|
44
|
+
super().__init__("cross_skill_analyzer")
|
|
45
|
+
self._skills: list[Skill] = []
|
|
46
|
+
|
|
47
|
+
def analyze(self, skill: Skill) -> list[Finding]:
|
|
48
|
+
"""
|
|
49
|
+
Analyze a single skill (no-op for cross-skill analyzer).
|
|
50
|
+
|
|
51
|
+
This analyzer only produces findings when analyzing skill sets.
|
|
52
|
+
Call analyze_skill_set() instead.
|
|
53
|
+
"""
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
def analyze_skill_set(self, skills: list[Skill]) -> list[Finding]:
|
|
57
|
+
"""
|
|
58
|
+
Analyze a set of skills for coordinated attack patterns.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
skills: List of skills to analyze together
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
List of findings related to cross-skill patterns
|
|
65
|
+
"""
|
|
66
|
+
if len(skills) < 2:
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
self._skills = skills
|
|
70
|
+
findings = []
|
|
71
|
+
|
|
72
|
+
# Detection 1: Data relay patterns
|
|
73
|
+
findings.extend(self._detect_data_relay_pattern())
|
|
74
|
+
|
|
75
|
+
# Detection 2: Shared external URLs
|
|
76
|
+
findings.extend(self._detect_shared_external_urls())
|
|
77
|
+
|
|
78
|
+
# Detection 3: Complementary triggers
|
|
79
|
+
findings.extend(self._detect_complementary_triggers())
|
|
80
|
+
|
|
81
|
+
# Detection 4: Shared suspicious code patterns
|
|
82
|
+
findings.extend(self._detect_shared_suspicious_patterns())
|
|
83
|
+
|
|
84
|
+
return findings
|
|
85
|
+
|
|
86
|
+
def _detect_data_relay_pattern(self) -> list[Finding]:
|
|
87
|
+
"""
|
|
88
|
+
Detect data relay patterns where one skill collects data and another exfiltrates.
|
|
89
|
+
|
|
90
|
+
Pattern: Skill A reads credentials/sensitive data, Skill B sends to network.
|
|
91
|
+
"""
|
|
92
|
+
findings = []
|
|
93
|
+
|
|
94
|
+
# Categorize skills by behavior
|
|
95
|
+
collectors: list[tuple[Skill, set[str]]] = [] # Skills that read sensitive data
|
|
96
|
+
exfiltrators: list[tuple[Skill, set[str]]] = [] # Skills with network output
|
|
97
|
+
|
|
98
|
+
# Patterns that indicate data collection
|
|
99
|
+
COLLECTION_PATTERNS = [
|
|
100
|
+
r"credential",
|
|
101
|
+
r"password",
|
|
102
|
+
r"secret",
|
|
103
|
+
r"api[_-]?key",
|
|
104
|
+
r"token",
|
|
105
|
+
r"\.env",
|
|
106
|
+
r"config",
|
|
107
|
+
r"ssh",
|
|
108
|
+
r"private",
|
|
109
|
+
r"\.pem",
|
|
110
|
+
r"~/.ssh",
|
|
111
|
+
r"/etc/passwd",
|
|
112
|
+
r"/etc/shadow",
|
|
113
|
+
r"keychain",
|
|
114
|
+
r"wallet",
|
|
115
|
+
r"cookie",
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
# Patterns that indicate network exfiltration
|
|
119
|
+
EXFIL_PATTERNS = [
|
|
120
|
+
r"requests\.(post|put)",
|
|
121
|
+
r"urllib\.request",
|
|
122
|
+
r"httpx\.(post|put)",
|
|
123
|
+
r"socket\.send",
|
|
124
|
+
r"aiohttp.*post",
|
|
125
|
+
r"webhook",
|
|
126
|
+
r"discord\.com/api/webhooks",
|
|
127
|
+
r"ngrok",
|
|
128
|
+
r"localhost\.run",
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
for skill in self._skills:
|
|
132
|
+
skill_content = self._get_skill_content(skill)
|
|
133
|
+
|
|
134
|
+
# Check for collection patterns
|
|
135
|
+
collection_hits = set()
|
|
136
|
+
for pattern in COLLECTION_PATTERNS:
|
|
137
|
+
if re.search(pattern, skill_content, re.IGNORECASE):
|
|
138
|
+
collection_hits.add(pattern)
|
|
139
|
+
if collection_hits:
|
|
140
|
+
collectors.append((skill, collection_hits))
|
|
141
|
+
|
|
142
|
+
# Check for exfiltration patterns
|
|
143
|
+
exfil_hits = set()
|
|
144
|
+
for pattern in EXFIL_PATTERNS:
|
|
145
|
+
if re.search(pattern, skill_content, re.IGNORECASE):
|
|
146
|
+
exfil_hits.add(pattern)
|
|
147
|
+
if exfil_hits:
|
|
148
|
+
exfiltrators.append((skill, exfil_hits))
|
|
149
|
+
|
|
150
|
+
# Flag if we have both collectors and exfiltrators
|
|
151
|
+
if collectors and exfiltrators:
|
|
152
|
+
collector_names = [s.name for s, _ in collectors]
|
|
153
|
+
exfil_names = [s.name for s, _ in exfiltrators]
|
|
154
|
+
|
|
155
|
+
# Only flag if they are different skills
|
|
156
|
+
if set(collector_names) != set(exfil_names):
|
|
157
|
+
findings.append(
|
|
158
|
+
Finding(
|
|
159
|
+
id=f"CROSS_SKILL_RELAY_{hash(tuple(collector_names + exfil_names)) & 0xFFFFFFFF:08x}",
|
|
160
|
+
rule_id="CROSS_SKILL_DATA_RELAY",
|
|
161
|
+
category=ThreatCategory.DATA_EXFILTRATION,
|
|
162
|
+
severity=Severity.HIGH,
|
|
163
|
+
title="Potential data relay attack pattern detected",
|
|
164
|
+
description=(
|
|
165
|
+
f"Skills appear to form a data relay chain. "
|
|
166
|
+
f"Collectors ({', '.join(collector_names)}) access sensitive data while "
|
|
167
|
+
f"exfiltrators ({', '.join(exfil_names)}) send data to external destinations. "
|
|
168
|
+
f"This pattern may indicate a coordinated attack."
|
|
169
|
+
),
|
|
170
|
+
file_path="(cross-skill analysis)",
|
|
171
|
+
remediation=(
|
|
172
|
+
"Review these skills together to ensure they are not collaborating "
|
|
173
|
+
"to exfiltrate sensitive data. Consider disabling one or both skills."
|
|
174
|
+
),
|
|
175
|
+
analyzer="cross_skill",
|
|
176
|
+
metadata={
|
|
177
|
+
"collectors": collector_names,
|
|
178
|
+
"exfiltrators": exfil_names,
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
return findings
|
|
184
|
+
|
|
185
|
+
def _detect_shared_external_urls(self) -> list[Finding]:
|
|
186
|
+
"""
|
|
187
|
+
Detect skills that reference the same external URLs.
|
|
188
|
+
|
|
189
|
+
Multiple skills pointing to the same external resource may indicate
|
|
190
|
+
coordinated command-and-control or exfiltration.
|
|
191
|
+
"""
|
|
192
|
+
findings = []
|
|
193
|
+
|
|
194
|
+
# Extract URLs from each skill
|
|
195
|
+
skill_urls: dict[str, list[str]] = {} # URL -> list of skill names
|
|
196
|
+
|
|
197
|
+
for skill in self._skills:
|
|
198
|
+
content = self._get_skill_content(skill)
|
|
199
|
+
urls = self._extract_urls(content)
|
|
200
|
+
|
|
201
|
+
for url in urls:
|
|
202
|
+
# Normalize URL (remove path, keep domain)
|
|
203
|
+
domain = self._extract_domain(url)
|
|
204
|
+
if domain and not self._is_common_domain(domain):
|
|
205
|
+
if domain not in skill_urls:
|
|
206
|
+
skill_urls[domain] = []
|
|
207
|
+
if skill.name not in skill_urls[domain]:
|
|
208
|
+
skill_urls[domain].append(skill.name)
|
|
209
|
+
|
|
210
|
+
# Flag domains referenced by multiple skills
|
|
211
|
+
for domain, skill_names in skill_urls.items():
|
|
212
|
+
if len(skill_names) >= 2:
|
|
213
|
+
findings.append(
|
|
214
|
+
Finding(
|
|
215
|
+
id=f"CROSS_SKILL_URL_{hash(domain) & 0xFFFFFFFF:08x}",
|
|
216
|
+
rule_id="CROSS_SKILL_SHARED_URL",
|
|
217
|
+
category=ThreatCategory.DATA_EXFILTRATION,
|
|
218
|
+
severity=Severity.MEDIUM,
|
|
219
|
+
title="Multiple skills reference the same external domain",
|
|
220
|
+
description=(
|
|
221
|
+
f"Domain '{domain}' is referenced by {len(skill_names)} skills: "
|
|
222
|
+
f"{', '.join(skill_names)}. Multiple skills pointing to the same "
|
|
223
|
+
f"external resource may indicate coordinated C2 or exfiltration."
|
|
224
|
+
),
|
|
225
|
+
file_path="(cross-skill analysis)",
|
|
226
|
+
remediation=(
|
|
227
|
+
"Review why multiple skills reference this domain and ensure "
|
|
228
|
+
"it is a legitimate, trusted resource."
|
|
229
|
+
),
|
|
230
|
+
analyzer="cross_skill",
|
|
231
|
+
metadata={
|
|
232
|
+
"domain": domain,
|
|
233
|
+
"skills": skill_names,
|
|
234
|
+
},
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return findings
|
|
239
|
+
|
|
240
|
+
def _detect_complementary_triggers(self) -> list[Finding]:
|
|
241
|
+
"""
|
|
242
|
+
Detect skills with complementary trigger descriptions.
|
|
243
|
+
|
|
244
|
+
Pattern: One skill designed to collect, another to exfiltrate,
|
|
245
|
+
with descriptions that suggest they work together.
|
|
246
|
+
"""
|
|
247
|
+
findings = []
|
|
248
|
+
|
|
249
|
+
# Keywords that suggest data collection
|
|
250
|
+
COLLECTION_KEYWORDS = {
|
|
251
|
+
"gather",
|
|
252
|
+
"collect",
|
|
253
|
+
"read",
|
|
254
|
+
"scan",
|
|
255
|
+
"find",
|
|
256
|
+
"search",
|
|
257
|
+
"extract",
|
|
258
|
+
"parse",
|
|
259
|
+
"load",
|
|
260
|
+
"get",
|
|
261
|
+
"fetch",
|
|
262
|
+
"retrieve",
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# Keywords that suggest data sending
|
|
266
|
+
SENDING_KEYWORDS = {
|
|
267
|
+
"send",
|
|
268
|
+
"upload",
|
|
269
|
+
"post",
|
|
270
|
+
"submit",
|
|
271
|
+
"transfer",
|
|
272
|
+
"sync",
|
|
273
|
+
"backup",
|
|
274
|
+
"export",
|
|
275
|
+
"share",
|
|
276
|
+
"publish",
|
|
277
|
+
"notify",
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
collectors = []
|
|
281
|
+
senders = []
|
|
282
|
+
|
|
283
|
+
for skill in self._skills:
|
|
284
|
+
desc_lower = skill.description.lower()
|
|
285
|
+
desc_words = set(re.findall(r"\b[a-z]+\b", desc_lower))
|
|
286
|
+
|
|
287
|
+
if desc_words & COLLECTION_KEYWORDS:
|
|
288
|
+
collectors.append(skill)
|
|
289
|
+
if desc_words & SENDING_KEYWORDS:
|
|
290
|
+
senders.append(skill)
|
|
291
|
+
|
|
292
|
+
# Flag if we have complementary skills
|
|
293
|
+
if collectors and senders:
|
|
294
|
+
# Check for suspicious combinations
|
|
295
|
+
for collector in collectors:
|
|
296
|
+
for sender in senders:
|
|
297
|
+
if collector.name != sender.name:
|
|
298
|
+
# Check if they might work together
|
|
299
|
+
coll_words = set(re.findall(r"\b[a-z]+\b", collector.description.lower()))
|
|
300
|
+
send_words = set(re.findall(r"\b[a-z]+\b", sender.description.lower()))
|
|
301
|
+
|
|
302
|
+
# Look for shared context words (excluding stop words and action words)
|
|
303
|
+
EXCLUDE_WORDS = (
|
|
304
|
+
COLLECTION_KEYWORDS
|
|
305
|
+
| SENDING_KEYWORDS
|
|
306
|
+
| {
|
|
307
|
+
"the",
|
|
308
|
+
"a",
|
|
309
|
+
"an",
|
|
310
|
+
"is",
|
|
311
|
+
"are",
|
|
312
|
+
"to",
|
|
313
|
+
"for",
|
|
314
|
+
"and",
|
|
315
|
+
"or",
|
|
316
|
+
"in",
|
|
317
|
+
"with",
|
|
318
|
+
}
|
|
319
|
+
)
|
|
320
|
+
shared_context = (coll_words & send_words) - EXCLUDE_WORDS
|
|
321
|
+
|
|
322
|
+
if len(shared_context) >= 2:
|
|
323
|
+
findings.append(
|
|
324
|
+
Finding(
|
|
325
|
+
id=f"CROSS_SKILL_COMPLEMENTARY_{hash(collector.name + sender.name) & 0xFFFFFFFF:08x}",
|
|
326
|
+
rule_id="CROSS_SKILL_COMPLEMENTARY_TRIGGERS",
|
|
327
|
+
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
328
|
+
severity=Severity.LOW,
|
|
329
|
+
title="Skills have complementary descriptions",
|
|
330
|
+
description=(
|
|
331
|
+
f"Skill '{collector.name}' (collector) and '{sender.name}' (sender) "
|
|
332
|
+
f"have complementary descriptions with shared context: {', '.join(shared_context)}. "
|
|
333
|
+
f"This may be intentional design or could indicate coordinated behavior."
|
|
334
|
+
),
|
|
335
|
+
file_path="(cross-skill analysis)",
|
|
336
|
+
remediation="Review these skills to ensure they are not designed to work together maliciously",
|
|
337
|
+
analyzer="cross_skill",
|
|
338
|
+
metadata={
|
|
339
|
+
"collector": collector.name,
|
|
340
|
+
"sender": sender.name,
|
|
341
|
+
"shared_context": list(shared_context),
|
|
342
|
+
},
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return findings
|
|
347
|
+
|
|
348
|
+
def _detect_shared_suspicious_patterns(self) -> list[Finding]:
|
|
349
|
+
"""
|
|
350
|
+
Detect skills that share suspicious code patterns.
|
|
351
|
+
|
|
352
|
+
Similar obfuscation or encoding across skills may indicate
|
|
353
|
+
they came from the same malicious source.
|
|
354
|
+
"""
|
|
355
|
+
findings = []
|
|
356
|
+
|
|
357
|
+
# Extract suspicious patterns from each skill
|
|
358
|
+
SUSPICIOUS_PATTERNS = [
|
|
359
|
+
(r"base64\.b64decode", "base64_decode"),
|
|
360
|
+
(r"exec\s*\(", "exec_call"),
|
|
361
|
+
(r"eval\s*\(", "eval_call"),
|
|
362
|
+
(r"\\x[0-9a-fA-F]{2}", "hex_escape"),
|
|
363
|
+
(r"chr\([0-9]+\)", "chr_call"),
|
|
364
|
+
(r"getattr\s*\([^)]+,\s*['\"][^'\"]+['\"]\s*\)", "dynamic_getattr"),
|
|
365
|
+
]
|
|
366
|
+
|
|
367
|
+
skill_patterns: dict[str, list[str]] = {} # pattern -> list of skill names
|
|
368
|
+
|
|
369
|
+
for skill in self._skills:
|
|
370
|
+
content = self._get_skill_content(skill)
|
|
371
|
+
|
|
372
|
+
for pattern, name in SUSPICIOUS_PATTERNS:
|
|
373
|
+
if re.search(pattern, content):
|
|
374
|
+
if name not in skill_patterns:
|
|
375
|
+
skill_patterns[name] = []
|
|
376
|
+
if skill.name not in skill_patterns[name]:
|
|
377
|
+
skill_patterns[name].append(skill.name)
|
|
378
|
+
|
|
379
|
+
# Flag patterns shared by multiple skills
|
|
380
|
+
for pattern_name, skill_names in skill_patterns.items():
|
|
381
|
+
if len(skill_names) >= 2:
|
|
382
|
+
findings.append(
|
|
383
|
+
Finding(
|
|
384
|
+
id=f"CROSS_SKILL_PATTERN_{hash(pattern_name + str(skill_names)) & 0xFFFFFFFF:08x}",
|
|
385
|
+
rule_id="CROSS_SKILL_SHARED_PATTERN",
|
|
386
|
+
category=ThreatCategory.OBFUSCATION,
|
|
387
|
+
severity=Severity.MEDIUM,
|
|
388
|
+
title="Multiple skills share suspicious code pattern",
|
|
389
|
+
description=(
|
|
390
|
+
f"Pattern '{pattern_name}' found in {len(skill_names)} skills: "
|
|
391
|
+
f"{', '.join(skill_names)}. Shared suspicious patterns may indicate "
|
|
392
|
+
f"skills from the same malicious source."
|
|
393
|
+
),
|
|
394
|
+
file_path="(cross-skill analysis)",
|
|
395
|
+
remediation=(
|
|
396
|
+
"Review these skills carefully - shared obfuscation or encoding "
|
|
397
|
+
"patterns often indicate malicious intent."
|
|
398
|
+
),
|
|
399
|
+
analyzer="cross_skill",
|
|
400
|
+
metadata={
|
|
401
|
+
"pattern": pattern_name,
|
|
402
|
+
"skills": skill_names,
|
|
403
|
+
},
|
|
404
|
+
)
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return findings
|
|
408
|
+
|
|
409
|
+
def _get_skill_content(self, skill: Skill) -> str:
|
|
410
|
+
"""Get all content from a skill as a single string."""
|
|
411
|
+
content_parts = [skill.description, skill.instruction_body]
|
|
412
|
+
|
|
413
|
+
for skill_file in skill.files:
|
|
414
|
+
try:
|
|
415
|
+
file_content = skill_file.read_content()
|
|
416
|
+
if file_content:
|
|
417
|
+
content_parts.append(file_content)
|
|
418
|
+
except Exception:
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
return "\n".join(content_parts)
|
|
422
|
+
|
|
423
|
+
def _extract_urls(self, content: str) -> list[str]:
|
|
424
|
+
"""Extract URLs from content."""
|
|
425
|
+
url_pattern = r'https?://[^\s<>"\')\]]+[^\s<>"\')\]\.,]'
|
|
426
|
+
return re.findall(url_pattern, content)
|
|
427
|
+
|
|
428
|
+
def _extract_domain(self, url: str) -> str:
|
|
429
|
+
"""Extract domain from URL."""
|
|
430
|
+
match = re.match(r"https?://([^/]+)", url)
|
|
431
|
+
if match:
|
|
432
|
+
return match.group(1).lower()
|
|
433
|
+
return ""
|
|
434
|
+
|
|
435
|
+
def _is_common_domain(self, domain: str) -> bool:
|
|
436
|
+
"""Check if domain is a common/trusted domain."""
|
|
437
|
+
COMMON_DOMAINS = {
|
|
438
|
+
# Code hosting / package registries
|
|
439
|
+
"github.com",
|
|
440
|
+
"githubusercontent.com",
|
|
441
|
+
"gitlab.com",
|
|
442
|
+
"pypi.org",
|
|
443
|
+
"npmjs.com",
|
|
444
|
+
"python.org",
|
|
445
|
+
"crates.io",
|
|
446
|
+
"rubygems.org",
|
|
447
|
+
"packagist.org",
|
|
448
|
+
# AI providers
|
|
449
|
+
"anthropic.com",
|
|
450
|
+
"openai.com",
|
|
451
|
+
"claude.com",
|
|
452
|
+
# Cloud providers
|
|
453
|
+
"google.com",
|
|
454
|
+
"googleapis.com",
|
|
455
|
+
"microsoft.com",
|
|
456
|
+
"azure.com",
|
|
457
|
+
"amazon.com",
|
|
458
|
+
"amazonaws.com",
|
|
459
|
+
"aws.amazon.com",
|
|
460
|
+
# Documentation / references
|
|
461
|
+
"stackoverflow.com",
|
|
462
|
+
"docs.python.org",
|
|
463
|
+
"developer.mozilla.org",
|
|
464
|
+
"mdn.io",
|
|
465
|
+
# Standards organizations & licensing
|
|
466
|
+
"apache.org",
|
|
467
|
+
"www.apache.org", # Apache license
|
|
468
|
+
"opensource.org", # OSI licenses
|
|
469
|
+
"creativecommons.org", # CC licenses
|
|
470
|
+
"w3.org",
|
|
471
|
+
"www.w3.org", # W3C standards
|
|
472
|
+
"ietf.org", # IETF standards
|
|
473
|
+
# XML/Document standards (used by Office docs)
|
|
474
|
+
"schemas.openxmlformats.org",
|
|
475
|
+
"schemas.microsoft.com",
|
|
476
|
+
"purl.org", # Persistent URLs for standards
|
|
477
|
+
"dublincore.org", # Metadata standard
|
|
478
|
+
"xmlsoft.org", # libxml
|
|
479
|
+
# CDNs (common for web templates)
|
|
480
|
+
"cdnjs.cloudflare.com",
|
|
481
|
+
"cdn.jsdelivr.net",
|
|
482
|
+
"unpkg.com",
|
|
483
|
+
"ajax.googleapis.com",
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
# Check if domain or parent domain is common
|
|
487
|
+
for common in COMMON_DOMAINS:
|
|
488
|
+
if domain == common or domain.endswith("." + common):
|
|
489
|
+
return True
|
|
490
|
+
return False
|