cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,377 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Skill package loader and SKILL.md parser.
19
+ """
20
+
21
+ import re
22
+ from pathlib import Path
23
+
24
+ import frontmatter
25
+
26
+ from .models import Skill, SkillFile, SkillManifest
27
+
28
+
29
+ class SkillLoadError(Exception):
30
+ """Exception raised when skill loading fails."""
31
+
32
+ pass
33
+
34
+
35
+ class SkillLoader:
36
+ """Loads and parses Claude Skill, Codex Skill, and Cursor Agent Skill packages.
37
+
38
+ Supports the Agent Skills specification format used by Claude Skills,
39
+ OpenAI Codex Skills, and Cursor Agent Skills. Skills are structured as:
40
+ - SKILL.md (required): YAML frontmatter + Markdown instructions
41
+ - scripts/ (optional): Executable code (Python, Bash)
42
+ - references/ (optional): Documentation and data files
43
+ - assets/ (optional): Templates, images, and other resources
44
+ """
45
+
46
+ # File type mappings
47
+ PYTHON_EXTENSIONS = {".py"}
48
+ BASH_EXTENSIONS = {".sh", ".bash"}
49
+ MARKDOWN_EXTENSIONS = {".md", ".markdown"}
50
+ BINARY_EXTENSIONS = {".exe", ".so", ".dylib", ".dll", ".bin"}
51
+
52
+ def __init__(self, max_file_size_mb: int = 10):
53
+ """
54
+ Initialize skill loader.
55
+
56
+ Args:
57
+ max_file_size_mb: Maximum file size to read in MB
58
+ """
59
+ self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
60
+
61
+ def load_skill(self, skill_directory: Path) -> Skill:
62
+ """
63
+ Load a skill package from a directory.
64
+
65
+ Args:
66
+ skill_directory: Path to the skill directory
67
+
68
+ Returns:
69
+ Parsed Skill object
70
+
71
+ Raises:
72
+ SkillLoadError: If skill cannot be loaded
73
+ """
74
+ if not isinstance(skill_directory, Path):
75
+ skill_directory = Path(skill_directory)
76
+
77
+ if not skill_directory.exists():
78
+ raise SkillLoadError(f"Skill directory does not exist: {skill_directory}")
79
+
80
+ if not skill_directory.is_dir():
81
+ raise SkillLoadError(f"Path is not a directory: {skill_directory}")
82
+
83
+ # Find SKILL.md
84
+ skill_md_path = skill_directory / "SKILL.md"
85
+ if not skill_md_path.exists():
86
+ raise SkillLoadError(f"SKILL.md not found in {skill_directory}")
87
+
88
+ # Parse SKILL.md
89
+ manifest, instruction_body = self._parse_skill_md(skill_md_path)
90
+
91
+ # Discover all files in the skill package
92
+ files = self._discover_files(skill_directory)
93
+
94
+ # Extract referenced files from instruction body
95
+ referenced_files = self._extract_referenced_files(instruction_body)
96
+
97
+ return Skill(
98
+ directory=skill_directory,
99
+ manifest=manifest,
100
+ skill_md_path=skill_md_path,
101
+ instruction_body=instruction_body,
102
+ files=files,
103
+ referenced_files=referenced_files,
104
+ )
105
+
106
+ def _parse_skill_md(self, skill_md_path: Path) -> tuple[SkillManifest, str]:
107
+ """
108
+ Parse SKILL.md file with YAML frontmatter.
109
+
110
+ Args:
111
+ skill_md_path: Path to SKILL.md
112
+
113
+ Returns:
114
+ Tuple of (SkillManifest, instruction_body)
115
+
116
+ Raises:
117
+ SkillLoadError: If parsing fails
118
+ """
119
+ try:
120
+ with open(skill_md_path, encoding="utf-8") as f:
121
+ content = f.read()
122
+ except (OSError, UnicodeDecodeError) as e:
123
+ raise SkillLoadError(f"Failed to read SKILL.md: {e}")
124
+
125
+ # Parse with python-frontmatter
126
+ try:
127
+ post = frontmatter.loads(content)
128
+ metadata = post.metadata
129
+ body = post.content
130
+ except Exception as e:
131
+ raise SkillLoadError(f"Failed to parse YAML frontmatter: {e}")
132
+
133
+ # Validate required fields
134
+ if "name" not in metadata:
135
+ raise SkillLoadError("SKILL.md missing required field: name")
136
+ if "description" not in metadata:
137
+ raise SkillLoadError("SKILL.md missing required field: description")
138
+
139
+ # Extract metadata field - if YAML has a 'metadata' key, use it directly
140
+ # Otherwise, collect remaining fields as metadata
141
+ metadata_field = None
142
+ if "metadata" in metadata and isinstance(metadata["metadata"], dict):
143
+ # YAML has explicit metadata key (Codex Skills format)
144
+ metadata_field = metadata["metadata"]
145
+ else:
146
+ # Collect remaining fields as metadata (Claude Skills format)
147
+ # Exclude known fields from being collected as metadata
148
+ known_fields = [
149
+ "name",
150
+ "description",
151
+ "license",
152
+ "compatibility",
153
+ "allowed-tools",
154
+ "allowed_tools",
155
+ "metadata",
156
+ "disable-model-invocation",
157
+ "disable_model_invocation",
158
+ ]
159
+ metadata_field = {k: v for k, v in metadata.items() if k not in known_fields}
160
+ # Only set metadata if there are remaining fields
161
+ if not metadata_field:
162
+ metadata_field = None
163
+
164
+ # Extract disable-model-invocation (Cursor Agent Skills format)
165
+ # Supports both kebab-case and snake_case variants
166
+ # Use explicit None check to properly handle `false` values
167
+ disable_model_invocation = metadata.get("disable-model-invocation")
168
+ if disable_model_invocation is None:
169
+ disable_model_invocation = metadata.get("disable_model_invocation", False)
170
+
171
+ # Create manifest
172
+ manifest = SkillManifest(
173
+ name=metadata["name"],
174
+ description=metadata["description"],
175
+ license=metadata.get("license"),
176
+ compatibility=metadata.get("compatibility"),
177
+ allowed_tools=metadata.get("allowed-tools") or metadata.get("allowed_tools"),
178
+ metadata=metadata_field,
179
+ disable_model_invocation=bool(disable_model_invocation),
180
+ )
181
+
182
+ return manifest, body
183
+
184
+ def _discover_files(self, skill_directory: Path) -> list[SkillFile]:
185
+ """
186
+ Discover all files in the skill package.
187
+
188
+ Args:
189
+ skill_directory: Path to skill directory
190
+
191
+ Returns:
192
+ List of SkillFile objects
193
+ """
194
+ files = []
195
+
196
+ for path in skill_directory.rglob("*"):
197
+ if not path.is_file():
198
+ continue
199
+
200
+ # Skip hidden files and __pycache__ (relative to the skill directory)
201
+ #
202
+ # Important: Skills may live under hidden parent directories like `.claude/skills/`.
203
+ # We only want to skip hidden files/folders *inside* the skill package, not its parents.
204
+ rel_parts = path.relative_to(skill_directory).parts
205
+ if any(part.startswith(".") for part in rel_parts):
206
+ continue
207
+ if "__pycache__" in rel_parts:
208
+ continue
209
+
210
+ relative_path = str(path.relative_to(skill_directory))
211
+ file_type = self._determine_file_type(path)
212
+ size_bytes = path.stat().st_size
213
+
214
+ # Read content if not too large and not binary
215
+ content = None
216
+ if size_bytes < self.max_file_size_bytes and file_type != "binary":
217
+ try:
218
+ with open(path, encoding="utf-8") as f:
219
+ content = f.read()
220
+ except (OSError, UnicodeDecodeError):
221
+ # Treat as binary if can't read as text
222
+ file_type = "binary"
223
+
224
+ skill_file = SkillFile(
225
+ path=path,
226
+ relative_path=relative_path,
227
+ file_type=file_type,
228
+ content=content,
229
+ size_bytes=size_bytes,
230
+ )
231
+ files.append(skill_file)
232
+
233
+ return files
234
+
235
+ def _determine_file_type(self, path: Path) -> str:
236
+ """
237
+ Determine the type of a file based on extension.
238
+
239
+ Args:
240
+ path: File path
241
+
242
+ Returns:
243
+ File type string
244
+ """
245
+ suffix = path.suffix.lower()
246
+
247
+ if suffix in self.PYTHON_EXTENSIONS:
248
+ return "python"
249
+ elif suffix in self.BASH_EXTENSIONS:
250
+ return "bash"
251
+ elif suffix in self.MARKDOWN_EXTENSIONS:
252
+ return "markdown"
253
+ elif suffix in self.BINARY_EXTENSIONS:
254
+ return "binary"
255
+ else:
256
+ return "other"
257
+
258
+ def _extract_referenced_files(self, instruction_body: str) -> list[str]:
259
+ """
260
+ Extract file references from instruction body.
261
+
262
+ Looks for markdown links, common file reference patterns, directives,
263
+ and other ways files might be referenced.
264
+
265
+ Args:
266
+ instruction_body: The markdown instruction text
267
+
268
+ Returns:
269
+ List of referenced file paths
270
+ """
271
+ references = []
272
+
273
+ # Match markdown links: [text](file.md)
274
+ markdown_links = re.findall(r"\[([^\]]+)\]\(([^\)]+)\)", instruction_body)
275
+ for _, link in markdown_links:
276
+ # Filter out URLs, keep relative file paths
277
+ if not link.startswith(("http://", "https://", "ftp://", "#")):
278
+ references.append(link)
279
+
280
+ # Match "see FILE.md" or "refer to FILE.md" patterns
281
+ # Use backticks or quotes to identify actual file references, avoiding false matches like "the.py"
282
+ see_patterns = re.findall(
283
+ r"(?:see|refer to|check|read)\s+[`'\"]([A-Za-z0-9_\-./]+\.(?:md|py|sh|txt))[`'\"]",
284
+ instruction_body,
285
+ re.IGNORECASE,
286
+ )
287
+ references.extend(see_patterns)
288
+
289
+ # Match script execution patterns: scripts/foo.py
290
+ script_patterns = re.findall(
291
+ r"(?:run|execute|invoke)\s+([A-Za-z0-9_\-./]+\.(?:py|sh))", instruction_body, re.IGNORECASE
292
+ )
293
+ references.extend(script_patterns)
294
+
295
+ # Match @reference: directives (common in documentation)
296
+ reference_directives = re.findall(r"@reference:\s*([A-Za-z0-9_\-./]+)", instruction_body, re.IGNORECASE)
297
+ references.extend(reference_directives)
298
+
299
+ # Match include: statements
300
+ include_patterns = re.findall(
301
+ r"(?:include|import|load):\s*([A-Za-z0-9_\-./]+\.(?:md|py|sh|txt|yaml|json))",
302
+ instruction_body,
303
+ re.IGNORECASE,
304
+ )
305
+ references.extend(include_patterns)
306
+
307
+ # Match file paths in code blocks that look like references
308
+ code_file_refs = re.findall(r"(?:from|import)\s+([A-Za-z0-9_]+)\s", instruction_body)
309
+ # Only add if it looks like a local module (not standard lib)
310
+ for ref in code_file_refs:
311
+ if not ref.startswith(("os", "sys", "re", "json", "yaml", "typing")):
312
+ references.append(f"{ref}.py")
313
+
314
+ # Match references/* or assets/* patterns
315
+ asset_patterns = re.findall(r"(?:references|assets|templates)/([A-Za-z0-9_\-./]+)", instruction_body)
316
+ for pattern in asset_patterns:
317
+ references.append(f"references/{pattern}")
318
+ references.append(f"assets/{pattern}")
319
+ references.append(f"templates/{pattern}")
320
+
321
+ # Return unique references
322
+ return list(set(references))
323
+
324
+ def extract_references_from_file(self, file_path: Path, content: str) -> list[str]:
325
+ """
326
+ Extract references from a specific file based on its type.
327
+
328
+ Args:
329
+ file_path: Path to the file
330
+ content: File content
331
+
332
+ Returns:
333
+ List of referenced file paths
334
+ """
335
+ references = []
336
+ suffix = file_path.suffix.lower()
337
+
338
+ if suffix in (".md", ".markdown"):
339
+ # Use the standard markdown extraction
340
+ references.extend(self._extract_referenced_files(content))
341
+
342
+ elif suffix == ".py":
343
+ # Extract Python imports that might be local modules
344
+ import_patterns = re.findall(r"^from\s+([A-Za-z0-9_.]+)\s+import", content, re.MULTILINE)
345
+ relative_imports = re.findall(r"^from\s+\.([A-Za-z0-9_.]*)\s+import", content, re.MULTILINE)
346
+
347
+ for imp in import_patterns:
348
+ # Only include if it looks like a local module
349
+ if not imp.startswith(("os", "sys", "re", "json", "pathlib", "typing", "collections")):
350
+ parts = imp.split(".")
351
+ references.append(f"{parts[0]}.py")
352
+
353
+ for imp in relative_imports:
354
+ if imp:
355
+ references.append(f"{imp}.py")
356
+
357
+ elif suffix in (".sh", ".bash"):
358
+ # Extract source commands
359
+ source_patterns = re.findall(r"(?:source|\.)\s+([A-Za-z0-9_\-./]+\.(?:sh|bash))", content)
360
+ references.extend(source_patterns)
361
+
362
+ return list(set(references))
363
+
364
+
365
+ def load_skill(skill_directory: Path, max_file_size_mb: int = 10) -> Skill:
366
+ """
367
+ Convenience function to load a skill package.
368
+
369
+ Args:
370
+ skill_directory: Path to skill directory
371
+ max_file_size_mb: Maximum file size to read
372
+
373
+ Returns:
374
+ Loaded Skill object
375
+ """
376
+ loader = SkillLoader(max_file_size_mb=max_file_size_mb)
377
+ return loader.load_skill(skill_directory)
@@ -0,0 +1,300 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Data models for Claude Skills and security findings.
19
+ """
20
+
21
+ from dataclasses import dataclass, field
22
+ from datetime import datetime
23
+ from enum import Enum
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+
28
+ class Severity(str, Enum):
29
+ """Severity levels for security findings."""
30
+
31
+ CRITICAL = "CRITICAL"
32
+ HIGH = "HIGH"
33
+ MEDIUM = "MEDIUM"
34
+ LOW = "LOW"
35
+ INFO = "INFO"
36
+ SAFE = "SAFE"
37
+
38
+
39
+ class ThreatCategory(str, Enum):
40
+ """Categories of security threats."""
41
+
42
+ PROMPT_INJECTION = "prompt_injection"
43
+ COMMAND_INJECTION = "command_injection"
44
+ DATA_EXFILTRATION = "data_exfiltration"
45
+ UNAUTHORIZED_TOOL_USE = "unauthorized_tool_use"
46
+ OBFUSCATION = "obfuscation"
47
+ HARDCODED_SECRETS = "hardcoded_secrets"
48
+ SOCIAL_ENGINEERING = "social_engineering"
49
+ RESOURCE_ABUSE = "resource_abuse"
50
+ POLICY_VIOLATION = "policy_violation"
51
+ MALWARE = "malware"
52
+ # New threat categories
53
+ SKILL_DISCOVERY_ABUSE = "skill_discovery_abuse"
54
+ TRANSITIVE_TRUST_ABUSE = "transitive_trust_abuse"
55
+ AUTONOMY_ABUSE = "autonomy_abuse"
56
+ TOOL_CHAINING_ABUSE = "tool_chaining_abuse"
57
+ UNICODE_STEGANOGRAPHY = "unicode_steganography"
58
+
59
+
60
+ @dataclass
61
+ class SkillManifest:
62
+ """Parsed YAML frontmatter from SKILL.md.
63
+
64
+ Supports Claude Skills, Codex Skills, and Cursor Agent Skills formats,
65
+ which follow the Agent Skills specification. The format includes:
66
+ - Required: name, description
67
+ - Optional: license, compatibility, allowed-tools, metadata
68
+ - Cursor Skills: disable-model-invocation (controls automatic invocation)
69
+ - Codex Skills: metadata.short-description (optional user-facing description)
70
+ """
71
+
72
+ name: str
73
+ description: str
74
+ license: str | None = None
75
+ compatibility: str | None = None
76
+ allowed_tools: list[str] | None = None
77
+ metadata: dict[str, Any] | None = None
78
+ disable_model_invocation: bool = False
79
+
80
+ def __post_init__(self):
81
+ """Normalize allowed_tools to list."""
82
+ if self.allowed_tools is None:
83
+ self.allowed_tools = []
84
+ elif isinstance(self.allowed_tools, str):
85
+ # Claude Code/Codex docs commonly show comma-separated tool lists in YAML frontmatter
86
+ # (e.g., "allowed-tools: Read, Grep, Glob"). Treat this as a list.
87
+ parts = [p.strip() for p in self.allowed_tools.split(",")]
88
+ self.allowed_tools = [p for p in parts if p]
89
+
90
+ @property
91
+ def short_description(self) -> str | None:
92
+ """Get short-description from metadata (Codex Skills format)."""
93
+ if self.metadata and isinstance(self.metadata, dict):
94
+ return self.metadata.get("short-description")
95
+ return None
96
+
97
+
98
+ @dataclass
99
+ class SkillFile:
100
+ """A file within a skill package."""
101
+
102
+ path: Path
103
+ relative_path: str
104
+ file_type: str # 'markdown', 'python', 'bash', 'binary', 'other'
105
+ content: str | None = None
106
+ size_bytes: int = 0
107
+
108
+ def read_content(self) -> str:
109
+ """Read file content if not already loaded."""
110
+ if self.content is None and self.path.exists():
111
+ try:
112
+ with open(self.path, encoding="utf-8") as f:
113
+ self.content = f.read()
114
+ except (OSError, UnicodeDecodeError):
115
+ self.content = "" # Binary or unreadable file
116
+ return self.content or ""
117
+
118
+
119
+ @dataclass
120
+ class Skill:
121
+ """Represents a complete Claude Skill, Codex Skill, or Cursor Agent Skill package.
122
+
123
+ Supports the Agent Skills specification format used by Claude Skills,
124
+ OpenAI Codex Skills, and Cursor Agent Skills. The package structure includes:
125
+ - SKILL.md (required): Manifest and instructions
126
+ - scripts/ (optional): Executable code
127
+ - references/ (optional): Documentation files
128
+ - assets/ (optional): Templates and resources
129
+ """
130
+
131
+ directory: Path
132
+ manifest: SkillManifest
133
+ skill_md_path: Path
134
+ instruction_body: str
135
+ files: list[SkillFile] = field(default_factory=list)
136
+ referenced_files: list[str] = field(default_factory=list)
137
+
138
+ @property
139
+ def name(self) -> str:
140
+ return self.manifest.name
141
+
142
+ @property
143
+ def description(self) -> str:
144
+ return self.manifest.description
145
+
146
+ def get_scripts(self) -> list[SkillFile]:
147
+ """Get all script files (Python, Bash)."""
148
+ return [f for f in self.files if f.file_type in ("python", "bash")]
149
+
150
+ def get_markdown_files(self) -> list[SkillFile]:
151
+ """Get all markdown files."""
152
+ return [f for f in self.files if f.file_type == "markdown"]
153
+
154
+
155
+ @dataclass
156
+ class Finding:
157
+ """A security issue discovered in a skill."""
158
+
159
+ id: str # Unique finding identifier (e.g., rule ID + line number)
160
+ rule_id: str # Rule that triggered this finding
161
+ category: ThreatCategory
162
+ severity: Severity
163
+ title: str
164
+ description: str
165
+ file_path: str | None = None
166
+ line_number: int | None = None
167
+ snippet: str | None = None
168
+ remediation: str | None = None
169
+ analyzer: str | None = None # Which analyzer produced this finding (e.g., "static", "llm", "behavioral")
170
+ metadata: dict[str, Any] = field(default_factory=dict)
171
+
172
+ def to_dict(self) -> dict[str, Any]:
173
+ """Convert finding to dictionary."""
174
+ return {
175
+ "id": self.id,
176
+ "rule_id": self.rule_id,
177
+ "category": self.category.value,
178
+ "severity": self.severity.value,
179
+ "title": self.title,
180
+ "description": self.description,
181
+ "file_path": self.file_path,
182
+ "line_number": self.line_number,
183
+ "snippet": self.snippet,
184
+ "remediation": self.remediation,
185
+ "analyzer": self.analyzer,
186
+ "metadata": self.metadata,
187
+ }
188
+
189
+
190
+ @dataclass
191
+ class ScanResult:
192
+ """Results from scanning a single skill."""
193
+
194
+ skill_name: str
195
+ skill_directory: str
196
+ findings: list[Finding] = field(default_factory=list)
197
+ scan_duration_seconds: float = 0.0
198
+ analyzers_used: list[str] = field(default_factory=list)
199
+ timestamp: datetime = field(default_factory=datetime.now)
200
+
201
+ @property
202
+ def is_safe(self) -> bool:
203
+ """Check if skill passed all security checks."""
204
+ return not any(f.severity in (Severity.CRITICAL, Severity.HIGH) for f in self.findings)
205
+
206
+ @property
207
+ def max_severity(self) -> Severity:
208
+ """Get the highest severity level found."""
209
+ if not self.findings:
210
+ return Severity.SAFE
211
+
212
+ severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO]
213
+ for severity in severity_order:
214
+ if any(f.severity == severity for f in self.findings):
215
+ return severity
216
+ return Severity.SAFE
217
+
218
+ def get_findings_by_severity(self, severity: Severity) -> list[Finding]:
219
+ """Get all findings of a specific severity."""
220
+ return [f for f in self.findings if f.severity == severity]
221
+
222
+ def get_findings_by_category(self, category: ThreatCategory) -> list[Finding]:
223
+ """Get all findings of a specific category."""
224
+ return [f for f in self.findings if f.category == category]
225
+
226
+ def to_dict(self) -> dict[str, Any]:
227
+ """Convert scan result to dictionary.
228
+
229
+ Output format is compatible with mcp-scanner-plugin's SkillResultParser.
230
+ See: https://github.com/cisco/mcp-scanner-plugin
231
+ """
232
+ return {
233
+ "skill_name": self.skill_name,
234
+ "skill_path": self.skill_directory, # Plugin expects skill_path
235
+ "skill_directory": self.skill_directory, # Keep for backward compatibility
236
+ "is_safe": self.is_safe,
237
+ "max_severity": self.max_severity.value,
238
+ "findings_count": len(self.findings),
239
+ "findings": [f.to_dict() for f in self.findings],
240
+ "scan_duration_seconds": self.scan_duration_seconds,
241
+ "duration_ms": int(self.scan_duration_seconds * 1000), # Plugin expects duration_ms
242
+ "analyzers_used": self.analyzers_used,
243
+ "timestamp": self.timestamp.isoformat(),
244
+ }
245
+
246
+
247
+ @dataclass
248
+ class Report:
249
+ """Aggregated report from scanning one or more skills."""
250
+
251
+ scan_results: list[ScanResult] = field(default_factory=list)
252
+ total_skills_scanned: int = 0
253
+ total_findings: int = 0
254
+ critical_count: int = 0
255
+ high_count: int = 0
256
+ medium_count: int = 0
257
+ low_count: int = 0
258
+ info_count: int = 0
259
+ safe_count: int = 0
260
+ timestamp: datetime = field(default_factory=datetime.now)
261
+
262
+ def add_scan_result(self, result: ScanResult):
263
+ """Add a scan result and update counters."""
264
+ self.scan_results.append(result)
265
+ self.total_skills_scanned += 1
266
+ self.total_findings += len(result.findings)
267
+
268
+ for finding in result.findings:
269
+ if finding.severity == Severity.CRITICAL:
270
+ self.critical_count += 1
271
+ elif finding.severity == Severity.HIGH:
272
+ self.high_count += 1
273
+ elif finding.severity == Severity.MEDIUM:
274
+ self.medium_count += 1
275
+ elif finding.severity == Severity.LOW:
276
+ self.low_count += 1
277
+ elif finding.severity == Severity.INFO:
278
+ self.info_count += 1
279
+
280
+ if result.is_safe:
281
+ self.safe_count += 1
282
+
283
+ def to_dict(self) -> dict[str, Any]:
284
+ """Convert report to dictionary."""
285
+ return {
286
+ "summary": {
287
+ "total_skills_scanned": self.total_skills_scanned,
288
+ "total_findings": self.total_findings,
289
+ "safe_skills": self.safe_count,
290
+ "findings_by_severity": {
291
+ "critical": self.critical_count,
292
+ "high": self.high_count,
293
+ "medium": self.medium_count,
294
+ "low": self.low_count,
295
+ "info": self.info_count,
296
+ },
297
+ "timestamp": self.timestamp.isoformat(),
298
+ },
299
+ "results": [result.to_dict() for result in self.scan_results],
300
+ }
@@ -0,0 +1,26 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Report generation modules for different output formats.
19
+ """
20
+
21
+ from .json_reporter import JSONReporter
22
+ from .markdown_reporter import MarkdownReporter
23
+ from .sarif_reporter import SARIFReporter
24
+ from .table_reporter import TableReporter
25
+
26
+ __all__ = ["JSONReporter", "MarkdownReporter", "TableReporter", "SARIFReporter"]