cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Skill package loader and SKILL.md parser.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import frontmatter
|
|
25
|
+
|
|
26
|
+
from .models import Skill, SkillFile, SkillManifest
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SkillLoadError(Exception):
|
|
30
|
+
"""Exception raised when skill loading fails."""
|
|
31
|
+
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SkillLoader:
|
|
36
|
+
"""Loads and parses Claude Skill, Codex Skill, and Cursor Agent Skill packages.
|
|
37
|
+
|
|
38
|
+
Supports the Agent Skills specification format used by Claude Skills,
|
|
39
|
+
OpenAI Codex Skills, and Cursor Agent Skills. Skills are structured as:
|
|
40
|
+
- SKILL.md (required): YAML frontmatter + Markdown instructions
|
|
41
|
+
- scripts/ (optional): Executable code (Python, Bash)
|
|
42
|
+
- references/ (optional): Documentation and data files
|
|
43
|
+
- assets/ (optional): Templates, images, and other resources
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# File type mappings
|
|
47
|
+
PYTHON_EXTENSIONS = {".py"}
|
|
48
|
+
BASH_EXTENSIONS = {".sh", ".bash"}
|
|
49
|
+
MARKDOWN_EXTENSIONS = {".md", ".markdown"}
|
|
50
|
+
BINARY_EXTENSIONS = {".exe", ".so", ".dylib", ".dll", ".bin"}
|
|
51
|
+
|
|
52
|
+
def __init__(self, max_file_size_mb: int = 10):
|
|
53
|
+
"""
|
|
54
|
+
Initialize skill loader.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
max_file_size_mb: Maximum file size to read in MB
|
|
58
|
+
"""
|
|
59
|
+
self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
|
|
60
|
+
|
|
61
|
+
def load_skill(self, skill_directory: Path) -> Skill:
|
|
62
|
+
"""
|
|
63
|
+
Load a skill package from a directory.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
skill_directory: Path to the skill directory
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Parsed Skill object
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
SkillLoadError: If skill cannot be loaded
|
|
73
|
+
"""
|
|
74
|
+
if not isinstance(skill_directory, Path):
|
|
75
|
+
skill_directory = Path(skill_directory)
|
|
76
|
+
|
|
77
|
+
if not skill_directory.exists():
|
|
78
|
+
raise SkillLoadError(f"Skill directory does not exist: {skill_directory}")
|
|
79
|
+
|
|
80
|
+
if not skill_directory.is_dir():
|
|
81
|
+
raise SkillLoadError(f"Path is not a directory: {skill_directory}")
|
|
82
|
+
|
|
83
|
+
# Find SKILL.md
|
|
84
|
+
skill_md_path = skill_directory / "SKILL.md"
|
|
85
|
+
if not skill_md_path.exists():
|
|
86
|
+
raise SkillLoadError(f"SKILL.md not found in {skill_directory}")
|
|
87
|
+
|
|
88
|
+
# Parse SKILL.md
|
|
89
|
+
manifest, instruction_body = self._parse_skill_md(skill_md_path)
|
|
90
|
+
|
|
91
|
+
# Discover all files in the skill package
|
|
92
|
+
files = self._discover_files(skill_directory)
|
|
93
|
+
|
|
94
|
+
# Extract referenced files from instruction body
|
|
95
|
+
referenced_files = self._extract_referenced_files(instruction_body)
|
|
96
|
+
|
|
97
|
+
return Skill(
|
|
98
|
+
directory=skill_directory,
|
|
99
|
+
manifest=manifest,
|
|
100
|
+
skill_md_path=skill_md_path,
|
|
101
|
+
instruction_body=instruction_body,
|
|
102
|
+
files=files,
|
|
103
|
+
referenced_files=referenced_files,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _parse_skill_md(self, skill_md_path: Path) -> tuple[SkillManifest, str]:
|
|
107
|
+
"""
|
|
108
|
+
Parse SKILL.md file with YAML frontmatter.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
skill_md_path: Path to SKILL.md
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Tuple of (SkillManifest, instruction_body)
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
SkillLoadError: If parsing fails
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
with open(skill_md_path, encoding="utf-8") as f:
|
|
121
|
+
content = f.read()
|
|
122
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
123
|
+
raise SkillLoadError(f"Failed to read SKILL.md: {e}")
|
|
124
|
+
|
|
125
|
+
# Parse with python-frontmatter
|
|
126
|
+
try:
|
|
127
|
+
post = frontmatter.loads(content)
|
|
128
|
+
metadata = post.metadata
|
|
129
|
+
body = post.content
|
|
130
|
+
except Exception as e:
|
|
131
|
+
raise SkillLoadError(f"Failed to parse YAML frontmatter: {e}")
|
|
132
|
+
|
|
133
|
+
# Validate required fields
|
|
134
|
+
if "name" not in metadata:
|
|
135
|
+
raise SkillLoadError("SKILL.md missing required field: name")
|
|
136
|
+
if "description" not in metadata:
|
|
137
|
+
raise SkillLoadError("SKILL.md missing required field: description")
|
|
138
|
+
|
|
139
|
+
# Extract metadata field - if YAML has a 'metadata' key, use it directly
|
|
140
|
+
# Otherwise, collect remaining fields as metadata
|
|
141
|
+
metadata_field = None
|
|
142
|
+
if "metadata" in metadata and isinstance(metadata["metadata"], dict):
|
|
143
|
+
# YAML has explicit metadata key (Codex Skills format)
|
|
144
|
+
metadata_field = metadata["metadata"]
|
|
145
|
+
else:
|
|
146
|
+
# Collect remaining fields as metadata (Claude Skills format)
|
|
147
|
+
# Exclude known fields from being collected as metadata
|
|
148
|
+
known_fields = [
|
|
149
|
+
"name",
|
|
150
|
+
"description",
|
|
151
|
+
"license",
|
|
152
|
+
"compatibility",
|
|
153
|
+
"allowed-tools",
|
|
154
|
+
"allowed_tools",
|
|
155
|
+
"metadata",
|
|
156
|
+
"disable-model-invocation",
|
|
157
|
+
"disable_model_invocation",
|
|
158
|
+
]
|
|
159
|
+
metadata_field = {k: v for k, v in metadata.items() if k not in known_fields}
|
|
160
|
+
# Only set metadata if there are remaining fields
|
|
161
|
+
if not metadata_field:
|
|
162
|
+
metadata_field = None
|
|
163
|
+
|
|
164
|
+
# Extract disable-model-invocation (Cursor Agent Skills format)
|
|
165
|
+
# Supports both kebab-case and snake_case variants
|
|
166
|
+
# Use explicit None check to properly handle `false` values
|
|
167
|
+
disable_model_invocation = metadata.get("disable-model-invocation")
|
|
168
|
+
if disable_model_invocation is None:
|
|
169
|
+
disable_model_invocation = metadata.get("disable_model_invocation", False)
|
|
170
|
+
|
|
171
|
+
# Create manifest
|
|
172
|
+
manifest = SkillManifest(
|
|
173
|
+
name=metadata["name"],
|
|
174
|
+
description=metadata["description"],
|
|
175
|
+
license=metadata.get("license"),
|
|
176
|
+
compatibility=metadata.get("compatibility"),
|
|
177
|
+
allowed_tools=metadata.get("allowed-tools") or metadata.get("allowed_tools"),
|
|
178
|
+
metadata=metadata_field,
|
|
179
|
+
disable_model_invocation=bool(disable_model_invocation),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
return manifest, body
|
|
183
|
+
|
|
184
|
+
def _discover_files(self, skill_directory: Path) -> list[SkillFile]:
|
|
185
|
+
"""
|
|
186
|
+
Discover all files in the skill package.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
skill_directory: Path to skill directory
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
List of SkillFile objects
|
|
193
|
+
"""
|
|
194
|
+
files = []
|
|
195
|
+
|
|
196
|
+
for path in skill_directory.rglob("*"):
|
|
197
|
+
if not path.is_file():
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# Skip hidden files and __pycache__ (relative to the skill directory)
|
|
201
|
+
#
|
|
202
|
+
# Important: Skills may live under hidden parent directories like `.claude/skills/`.
|
|
203
|
+
# We only want to skip hidden files/folders *inside* the skill package, not its parents.
|
|
204
|
+
rel_parts = path.relative_to(skill_directory).parts
|
|
205
|
+
if any(part.startswith(".") for part in rel_parts):
|
|
206
|
+
continue
|
|
207
|
+
if "__pycache__" in rel_parts:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
relative_path = str(path.relative_to(skill_directory))
|
|
211
|
+
file_type = self._determine_file_type(path)
|
|
212
|
+
size_bytes = path.stat().st_size
|
|
213
|
+
|
|
214
|
+
# Read content if not too large and not binary
|
|
215
|
+
content = None
|
|
216
|
+
if size_bytes < self.max_file_size_bytes and file_type != "binary":
|
|
217
|
+
try:
|
|
218
|
+
with open(path, encoding="utf-8") as f:
|
|
219
|
+
content = f.read()
|
|
220
|
+
except (OSError, UnicodeDecodeError):
|
|
221
|
+
# Treat as binary if can't read as text
|
|
222
|
+
file_type = "binary"
|
|
223
|
+
|
|
224
|
+
skill_file = SkillFile(
|
|
225
|
+
path=path,
|
|
226
|
+
relative_path=relative_path,
|
|
227
|
+
file_type=file_type,
|
|
228
|
+
content=content,
|
|
229
|
+
size_bytes=size_bytes,
|
|
230
|
+
)
|
|
231
|
+
files.append(skill_file)
|
|
232
|
+
|
|
233
|
+
return files
|
|
234
|
+
|
|
235
|
+
def _determine_file_type(self, path: Path) -> str:
|
|
236
|
+
"""
|
|
237
|
+
Determine the type of a file based on extension.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
path: File path
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
File type string
|
|
244
|
+
"""
|
|
245
|
+
suffix = path.suffix.lower()
|
|
246
|
+
|
|
247
|
+
if suffix in self.PYTHON_EXTENSIONS:
|
|
248
|
+
return "python"
|
|
249
|
+
elif suffix in self.BASH_EXTENSIONS:
|
|
250
|
+
return "bash"
|
|
251
|
+
elif suffix in self.MARKDOWN_EXTENSIONS:
|
|
252
|
+
return "markdown"
|
|
253
|
+
elif suffix in self.BINARY_EXTENSIONS:
|
|
254
|
+
return "binary"
|
|
255
|
+
else:
|
|
256
|
+
return "other"
|
|
257
|
+
|
|
258
|
+
def _extract_referenced_files(self, instruction_body: str) -> list[str]:
|
|
259
|
+
"""
|
|
260
|
+
Extract file references from instruction body.
|
|
261
|
+
|
|
262
|
+
Looks for markdown links, common file reference patterns, directives,
|
|
263
|
+
and other ways files might be referenced.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
instruction_body: The markdown instruction text
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
List of referenced file paths
|
|
270
|
+
"""
|
|
271
|
+
references = []
|
|
272
|
+
|
|
273
|
+
# Match markdown links: [text](file.md)
|
|
274
|
+
markdown_links = re.findall(r"\[([^\]]+)\]\(([^\)]+)\)", instruction_body)
|
|
275
|
+
for _, link in markdown_links:
|
|
276
|
+
# Filter out URLs, keep relative file paths
|
|
277
|
+
if not link.startswith(("http://", "https://", "ftp://", "#")):
|
|
278
|
+
references.append(link)
|
|
279
|
+
|
|
280
|
+
# Match "see FILE.md" or "refer to FILE.md" patterns
|
|
281
|
+
# Use backticks or quotes to identify actual file references, avoiding false matches like "the.py"
|
|
282
|
+
see_patterns = re.findall(
|
|
283
|
+
r"(?:see|refer to|check|read)\s+[`'\"]([A-Za-z0-9_\-./]+\.(?:md|py|sh|txt))[`'\"]",
|
|
284
|
+
instruction_body,
|
|
285
|
+
re.IGNORECASE,
|
|
286
|
+
)
|
|
287
|
+
references.extend(see_patterns)
|
|
288
|
+
|
|
289
|
+
# Match script execution patterns: scripts/foo.py
|
|
290
|
+
script_patterns = re.findall(
|
|
291
|
+
r"(?:run|execute|invoke)\s+([A-Za-z0-9_\-./]+\.(?:py|sh))", instruction_body, re.IGNORECASE
|
|
292
|
+
)
|
|
293
|
+
references.extend(script_patterns)
|
|
294
|
+
|
|
295
|
+
# Match @reference: directives (common in documentation)
|
|
296
|
+
reference_directives = re.findall(r"@reference:\s*([A-Za-z0-9_\-./]+)", instruction_body, re.IGNORECASE)
|
|
297
|
+
references.extend(reference_directives)
|
|
298
|
+
|
|
299
|
+
# Match include: statements
|
|
300
|
+
include_patterns = re.findall(
|
|
301
|
+
r"(?:include|import|load):\s*([A-Za-z0-9_\-./]+\.(?:md|py|sh|txt|yaml|json))",
|
|
302
|
+
instruction_body,
|
|
303
|
+
re.IGNORECASE,
|
|
304
|
+
)
|
|
305
|
+
references.extend(include_patterns)
|
|
306
|
+
|
|
307
|
+
# Match file paths in code blocks that look like references
|
|
308
|
+
code_file_refs = re.findall(r"(?:from|import)\s+([A-Za-z0-9_]+)\s", instruction_body)
|
|
309
|
+
# Only add if it looks like a local module (not standard lib)
|
|
310
|
+
for ref in code_file_refs:
|
|
311
|
+
if not ref.startswith(("os", "sys", "re", "json", "yaml", "typing")):
|
|
312
|
+
references.append(f"{ref}.py")
|
|
313
|
+
|
|
314
|
+
# Match references/* or assets/* patterns
|
|
315
|
+
asset_patterns = re.findall(r"(?:references|assets|templates)/([A-Za-z0-9_\-./]+)", instruction_body)
|
|
316
|
+
for pattern in asset_patterns:
|
|
317
|
+
references.append(f"references/{pattern}")
|
|
318
|
+
references.append(f"assets/{pattern}")
|
|
319
|
+
references.append(f"templates/{pattern}")
|
|
320
|
+
|
|
321
|
+
# Return unique references
|
|
322
|
+
return list(set(references))
|
|
323
|
+
|
|
324
|
+
def extract_references_from_file(self, file_path: Path, content: str) -> list[str]:
|
|
325
|
+
"""
|
|
326
|
+
Extract references from a specific file based on its type.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
file_path: Path to the file
|
|
330
|
+
content: File content
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
List of referenced file paths
|
|
334
|
+
"""
|
|
335
|
+
references = []
|
|
336
|
+
suffix = file_path.suffix.lower()
|
|
337
|
+
|
|
338
|
+
if suffix in (".md", ".markdown"):
|
|
339
|
+
# Use the standard markdown extraction
|
|
340
|
+
references.extend(self._extract_referenced_files(content))
|
|
341
|
+
|
|
342
|
+
elif suffix == ".py":
|
|
343
|
+
# Extract Python imports that might be local modules
|
|
344
|
+
import_patterns = re.findall(r"^from\s+([A-Za-z0-9_.]+)\s+import", content, re.MULTILINE)
|
|
345
|
+
relative_imports = re.findall(r"^from\s+\.([A-Za-z0-9_.]*)\s+import", content, re.MULTILINE)
|
|
346
|
+
|
|
347
|
+
for imp in import_patterns:
|
|
348
|
+
# Only include if it looks like a local module
|
|
349
|
+
if not imp.startswith(("os", "sys", "re", "json", "pathlib", "typing", "collections")):
|
|
350
|
+
parts = imp.split(".")
|
|
351
|
+
references.append(f"{parts[0]}.py")
|
|
352
|
+
|
|
353
|
+
for imp in relative_imports:
|
|
354
|
+
if imp:
|
|
355
|
+
references.append(f"{imp}.py")
|
|
356
|
+
|
|
357
|
+
elif suffix in (".sh", ".bash"):
|
|
358
|
+
# Extract source commands
|
|
359
|
+
source_patterns = re.findall(r"(?:source|\.)\s+([A-Za-z0-9_\-./]+\.(?:sh|bash))", content)
|
|
360
|
+
references.extend(source_patterns)
|
|
361
|
+
|
|
362
|
+
return list(set(references))
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def load_skill(skill_directory: Path, max_file_size_mb: int = 10) -> Skill:
|
|
366
|
+
"""
|
|
367
|
+
Convenience function to load a skill package.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
skill_directory: Path to skill directory
|
|
371
|
+
max_file_size_mb: Maximum file size to read
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
Loaded Skill object
|
|
375
|
+
"""
|
|
376
|
+
loader = SkillLoader(max_file_size_mb=max_file_size_mb)
|
|
377
|
+
return loader.load_skill(skill_directory)
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Data models for Claude Skills and security findings.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Severity(str, Enum):
|
|
29
|
+
"""Severity levels for security findings."""
|
|
30
|
+
|
|
31
|
+
CRITICAL = "CRITICAL"
|
|
32
|
+
HIGH = "HIGH"
|
|
33
|
+
MEDIUM = "MEDIUM"
|
|
34
|
+
LOW = "LOW"
|
|
35
|
+
INFO = "INFO"
|
|
36
|
+
SAFE = "SAFE"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ThreatCategory(str, Enum):
|
|
40
|
+
"""Categories of security threats."""
|
|
41
|
+
|
|
42
|
+
PROMPT_INJECTION = "prompt_injection"
|
|
43
|
+
COMMAND_INJECTION = "command_injection"
|
|
44
|
+
DATA_EXFILTRATION = "data_exfiltration"
|
|
45
|
+
UNAUTHORIZED_TOOL_USE = "unauthorized_tool_use"
|
|
46
|
+
OBFUSCATION = "obfuscation"
|
|
47
|
+
HARDCODED_SECRETS = "hardcoded_secrets"
|
|
48
|
+
SOCIAL_ENGINEERING = "social_engineering"
|
|
49
|
+
RESOURCE_ABUSE = "resource_abuse"
|
|
50
|
+
POLICY_VIOLATION = "policy_violation"
|
|
51
|
+
MALWARE = "malware"
|
|
52
|
+
# New threat categories
|
|
53
|
+
SKILL_DISCOVERY_ABUSE = "skill_discovery_abuse"
|
|
54
|
+
TRANSITIVE_TRUST_ABUSE = "transitive_trust_abuse"
|
|
55
|
+
AUTONOMY_ABUSE = "autonomy_abuse"
|
|
56
|
+
TOOL_CHAINING_ABUSE = "tool_chaining_abuse"
|
|
57
|
+
UNICODE_STEGANOGRAPHY = "unicode_steganography"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class SkillManifest:
|
|
62
|
+
"""Parsed YAML frontmatter from SKILL.md.
|
|
63
|
+
|
|
64
|
+
Supports Claude Skills, Codex Skills, and Cursor Agent Skills formats,
|
|
65
|
+
which follow the Agent Skills specification. The format includes:
|
|
66
|
+
- Required: name, description
|
|
67
|
+
- Optional: license, compatibility, allowed-tools, metadata
|
|
68
|
+
- Cursor Skills: disable-model-invocation (controls automatic invocation)
|
|
69
|
+
- Codex Skills: metadata.short-description (optional user-facing description)
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
name: str
|
|
73
|
+
description: str
|
|
74
|
+
license: str | None = None
|
|
75
|
+
compatibility: str | None = None
|
|
76
|
+
allowed_tools: list[str] | None = None
|
|
77
|
+
metadata: dict[str, Any] | None = None
|
|
78
|
+
disable_model_invocation: bool = False
|
|
79
|
+
|
|
80
|
+
def __post_init__(self):
|
|
81
|
+
"""Normalize allowed_tools to list."""
|
|
82
|
+
if self.allowed_tools is None:
|
|
83
|
+
self.allowed_tools = []
|
|
84
|
+
elif isinstance(self.allowed_tools, str):
|
|
85
|
+
# Claude Code/Codex docs commonly show comma-separated tool lists in YAML frontmatter
|
|
86
|
+
# (e.g., "allowed-tools: Read, Grep, Glob"). Treat this as a list.
|
|
87
|
+
parts = [p.strip() for p in self.allowed_tools.split(",")]
|
|
88
|
+
self.allowed_tools = [p for p in parts if p]
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def short_description(self) -> str | None:
|
|
92
|
+
"""Get short-description from metadata (Codex Skills format)."""
|
|
93
|
+
if self.metadata and isinstance(self.metadata, dict):
|
|
94
|
+
return self.metadata.get("short-description")
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class SkillFile:
|
|
100
|
+
"""A file within a skill package."""
|
|
101
|
+
|
|
102
|
+
path: Path
|
|
103
|
+
relative_path: str
|
|
104
|
+
file_type: str # 'markdown', 'python', 'bash', 'binary', 'other'
|
|
105
|
+
content: str | None = None
|
|
106
|
+
size_bytes: int = 0
|
|
107
|
+
|
|
108
|
+
def read_content(self) -> str:
|
|
109
|
+
"""Read file content if not already loaded."""
|
|
110
|
+
if self.content is None and self.path.exists():
|
|
111
|
+
try:
|
|
112
|
+
with open(self.path, encoding="utf-8") as f:
|
|
113
|
+
self.content = f.read()
|
|
114
|
+
except (OSError, UnicodeDecodeError):
|
|
115
|
+
self.content = "" # Binary or unreadable file
|
|
116
|
+
return self.content or ""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class Skill:
|
|
121
|
+
"""Represents a complete Claude Skill, Codex Skill, or Cursor Agent Skill package.
|
|
122
|
+
|
|
123
|
+
Supports the Agent Skills specification format used by Claude Skills,
|
|
124
|
+
OpenAI Codex Skills, and Cursor Agent Skills. The package structure includes:
|
|
125
|
+
- SKILL.md (required): Manifest and instructions
|
|
126
|
+
- scripts/ (optional): Executable code
|
|
127
|
+
- references/ (optional): Documentation files
|
|
128
|
+
- assets/ (optional): Templates and resources
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
directory: Path
|
|
132
|
+
manifest: SkillManifest
|
|
133
|
+
skill_md_path: Path
|
|
134
|
+
instruction_body: str
|
|
135
|
+
files: list[SkillFile] = field(default_factory=list)
|
|
136
|
+
referenced_files: list[str] = field(default_factory=list)
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def name(self) -> str:
|
|
140
|
+
return self.manifest.name
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def description(self) -> str:
|
|
144
|
+
return self.manifest.description
|
|
145
|
+
|
|
146
|
+
def get_scripts(self) -> list[SkillFile]:
|
|
147
|
+
"""Get all script files (Python, Bash)."""
|
|
148
|
+
return [f for f in self.files if f.file_type in ("python", "bash")]
|
|
149
|
+
|
|
150
|
+
def get_markdown_files(self) -> list[SkillFile]:
|
|
151
|
+
"""Get all markdown files."""
|
|
152
|
+
return [f for f in self.files if f.file_type == "markdown"]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass
|
|
156
|
+
class Finding:
|
|
157
|
+
"""A security issue discovered in a skill."""
|
|
158
|
+
|
|
159
|
+
id: str # Unique finding identifier (e.g., rule ID + line number)
|
|
160
|
+
rule_id: str # Rule that triggered this finding
|
|
161
|
+
category: ThreatCategory
|
|
162
|
+
severity: Severity
|
|
163
|
+
title: str
|
|
164
|
+
description: str
|
|
165
|
+
file_path: str | None = None
|
|
166
|
+
line_number: int | None = None
|
|
167
|
+
snippet: str | None = None
|
|
168
|
+
remediation: str | None = None
|
|
169
|
+
analyzer: str | None = None # Which analyzer produced this finding (e.g., "static", "llm", "behavioral")
|
|
170
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
171
|
+
|
|
172
|
+
def to_dict(self) -> dict[str, Any]:
|
|
173
|
+
"""Convert finding to dictionary."""
|
|
174
|
+
return {
|
|
175
|
+
"id": self.id,
|
|
176
|
+
"rule_id": self.rule_id,
|
|
177
|
+
"category": self.category.value,
|
|
178
|
+
"severity": self.severity.value,
|
|
179
|
+
"title": self.title,
|
|
180
|
+
"description": self.description,
|
|
181
|
+
"file_path": self.file_path,
|
|
182
|
+
"line_number": self.line_number,
|
|
183
|
+
"snippet": self.snippet,
|
|
184
|
+
"remediation": self.remediation,
|
|
185
|
+
"analyzer": self.analyzer,
|
|
186
|
+
"metadata": self.metadata,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@dataclass
|
|
191
|
+
class ScanResult:
|
|
192
|
+
"""Results from scanning a single skill."""
|
|
193
|
+
|
|
194
|
+
skill_name: str
|
|
195
|
+
skill_directory: str
|
|
196
|
+
findings: list[Finding] = field(default_factory=list)
|
|
197
|
+
scan_duration_seconds: float = 0.0
|
|
198
|
+
analyzers_used: list[str] = field(default_factory=list)
|
|
199
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def is_safe(self) -> bool:
|
|
203
|
+
"""Check if skill passed all security checks."""
|
|
204
|
+
return not any(f.severity in (Severity.CRITICAL, Severity.HIGH) for f in self.findings)
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def max_severity(self) -> Severity:
|
|
208
|
+
"""Get the highest severity level found."""
|
|
209
|
+
if not self.findings:
|
|
210
|
+
return Severity.SAFE
|
|
211
|
+
|
|
212
|
+
severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO]
|
|
213
|
+
for severity in severity_order:
|
|
214
|
+
if any(f.severity == severity for f in self.findings):
|
|
215
|
+
return severity
|
|
216
|
+
return Severity.SAFE
|
|
217
|
+
|
|
218
|
+
def get_findings_by_severity(self, severity: Severity) -> list[Finding]:
|
|
219
|
+
"""Get all findings of a specific severity."""
|
|
220
|
+
return [f for f in self.findings if f.severity == severity]
|
|
221
|
+
|
|
222
|
+
def get_findings_by_category(self, category: ThreatCategory) -> list[Finding]:
|
|
223
|
+
"""Get all findings of a specific category."""
|
|
224
|
+
return [f for f in self.findings if f.category == category]
|
|
225
|
+
|
|
226
|
+
def to_dict(self) -> dict[str, Any]:
|
|
227
|
+
"""Convert scan result to dictionary.
|
|
228
|
+
|
|
229
|
+
Output format is compatible with mcp-scanner-plugin's SkillResultParser.
|
|
230
|
+
See: https://github.com/cisco/mcp-scanner-plugin
|
|
231
|
+
"""
|
|
232
|
+
return {
|
|
233
|
+
"skill_name": self.skill_name,
|
|
234
|
+
"skill_path": self.skill_directory, # Plugin expects skill_path
|
|
235
|
+
"skill_directory": self.skill_directory, # Keep for backward compatibility
|
|
236
|
+
"is_safe": self.is_safe,
|
|
237
|
+
"max_severity": self.max_severity.value,
|
|
238
|
+
"findings_count": len(self.findings),
|
|
239
|
+
"findings": [f.to_dict() for f in self.findings],
|
|
240
|
+
"scan_duration_seconds": self.scan_duration_seconds,
|
|
241
|
+
"duration_ms": int(self.scan_duration_seconds * 1000), # Plugin expects duration_ms
|
|
242
|
+
"analyzers_used": self.analyzers_used,
|
|
243
|
+
"timestamp": self.timestamp.isoformat(),
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
@dataclass
|
|
248
|
+
class Report:
|
|
249
|
+
"""Aggregated report from scanning one or more skills."""
|
|
250
|
+
|
|
251
|
+
scan_results: list[ScanResult] = field(default_factory=list)
|
|
252
|
+
total_skills_scanned: int = 0
|
|
253
|
+
total_findings: int = 0
|
|
254
|
+
critical_count: int = 0
|
|
255
|
+
high_count: int = 0
|
|
256
|
+
medium_count: int = 0
|
|
257
|
+
low_count: int = 0
|
|
258
|
+
info_count: int = 0
|
|
259
|
+
safe_count: int = 0
|
|
260
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
261
|
+
|
|
262
|
+
def add_scan_result(self, result: ScanResult):
|
|
263
|
+
"""Add a scan result and update counters."""
|
|
264
|
+
self.scan_results.append(result)
|
|
265
|
+
self.total_skills_scanned += 1
|
|
266
|
+
self.total_findings += len(result.findings)
|
|
267
|
+
|
|
268
|
+
for finding in result.findings:
|
|
269
|
+
if finding.severity == Severity.CRITICAL:
|
|
270
|
+
self.critical_count += 1
|
|
271
|
+
elif finding.severity == Severity.HIGH:
|
|
272
|
+
self.high_count += 1
|
|
273
|
+
elif finding.severity == Severity.MEDIUM:
|
|
274
|
+
self.medium_count += 1
|
|
275
|
+
elif finding.severity == Severity.LOW:
|
|
276
|
+
self.low_count += 1
|
|
277
|
+
elif finding.severity == Severity.INFO:
|
|
278
|
+
self.info_count += 1
|
|
279
|
+
|
|
280
|
+
if result.is_safe:
|
|
281
|
+
self.safe_count += 1
|
|
282
|
+
|
|
283
|
+
def to_dict(self) -> dict[str, Any]:
|
|
284
|
+
"""Convert report to dictionary."""
|
|
285
|
+
return {
|
|
286
|
+
"summary": {
|
|
287
|
+
"total_skills_scanned": self.total_skills_scanned,
|
|
288
|
+
"total_findings": self.total_findings,
|
|
289
|
+
"safe_skills": self.safe_count,
|
|
290
|
+
"findings_by_severity": {
|
|
291
|
+
"critical": self.critical_count,
|
|
292
|
+
"high": self.high_count,
|
|
293
|
+
"medium": self.medium_count,
|
|
294
|
+
"low": self.low_count,
|
|
295
|
+
"info": self.info_count,
|
|
296
|
+
},
|
|
297
|
+
"timestamp": self.timestamp.isoformat(),
|
|
298
|
+
},
|
|
299
|
+
"results": [result.to_dict() for result in self.scan_results],
|
|
300
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Report generation modules for different output formats.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .json_reporter import JSONReporter
|
|
22
|
+
from .markdown_reporter import MarkdownReporter
|
|
23
|
+
from .sarif_reporter import SARIFReporter
|
|
24
|
+
from .table_reporter import TableReporter
|
|
25
|
+
|
|
26
|
+
__all__ = ["JSONReporter", "MarkdownReporter", "TableReporter", "SARIFReporter"]
|