PyPI - cisco-ai-skill-scanner - Versions diffs - 1.0.0__py3-none-any.whl - Mend

cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
skillanalyzer/__init__.py +45 -0
skillanalyzer/_version.py +34 -0
skillanalyzer/api/__init__.py +25 -0
skillanalyzer/api/api.py +34 -0
skillanalyzer/api/api_cli.py +78 -0
skillanalyzer/api/api_server.py +634 -0
skillanalyzer/api/router.py +527 -0
skillanalyzer/cli/__init__.py +25 -0
skillanalyzer/cli/cli.py +816 -0
skillanalyzer/config/__init__.py +26 -0
skillanalyzer/config/config.py +149 -0
skillanalyzer/config/config_parser.py +122 -0
skillanalyzer/config/constants.py +85 -0
skillanalyzer/core/__init__.py +24 -0
skillanalyzer/core/analyzers/__init__.py +75 -0
skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
skillanalyzer/core/analyzers/base.py +53 -0
skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
skillanalyzer/core/analyzers/static.py +1105 -0
skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
skillanalyzer/core/exceptions.py +77 -0
skillanalyzer/core/loader.py +377 -0
skillanalyzer/core/models.py +300 -0
skillanalyzer/core/reporters/__init__.py +26 -0
skillanalyzer/core/reporters/json_reporter.py +65 -0
skillanalyzer/core/reporters/markdown_reporter.py +209 -0
skillanalyzer/core/reporters/sarif_reporter.py +246 -0
skillanalyzer/core/reporters/table_reporter.py +195 -0
skillanalyzer/core/rules/__init__.py +19 -0
skillanalyzer/core/rules/patterns.py +165 -0
skillanalyzer/core/rules/yara_scanner.py +157 -0
skillanalyzer/core/scanner.py +437 -0
skillanalyzer/core/static_analysis/__init__.py +27 -0
skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
skillanalyzer/core/static_analysis/context_extractor.py +742 -0
skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
skillanalyzer/core/static_analysis/types/__init__.py +36 -0
skillanalyzer/data/__init__.py +30 -0
skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
skillanalyzer/data/prompts/llm_response_schema.json +71 -0
skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
skillanalyzer/data/prompts/unified_response_schema.md +97 -0
skillanalyzer/data/rules/signatures.yaml +440 -0
skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
skillanalyzer/data/yara_rules/code_execution.yara +61 -0
skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
skillanalyzer/data/yara_rules/command_injection.yara +54 -0
skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
skillanalyzer/data/yara_rules/script_injection.yara +83 -0
skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
skillanalyzer/hooks/__init__.py +21 -0
skillanalyzer/hooks/pre_commit.py +450 -0
skillanalyzer/threats/__init__.py +25 -0
skillanalyzer/threats/threats.py +480 -0
skillanalyzer/utils/__init__.py +28 -0
skillanalyzer/utils/command_utils.py +129 -0
skillanalyzer/utils/di_container.py +154 -0
skillanalyzer/utils/file_utils.py +86 -0
skillanalyzer/utils/logging_config.py +96 -0
skillanalyzer/utils/logging_utils.py +71 -0

skillanalyzer/core/analyzers/virustotal_analyzer.py ADDED Viewed

@@ -0,0 +1,463 @@
+# Copyright 2026 Cisco Systems, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""
+VirusTotal analyzer for scanning binary files using hash-based lookups.
+This analyzer checks binary files (images, PDFs, archives, etc.) against
+VirusTotal's database using SHA256 hash lookups. It does NOT scan code files
+like Python, JavaScript, or Markdown files.
+"""
+import hashlib
+import logging
+from pathlib import Path
+import httpx
+from ..models import Finding, Severity, Skill, ThreatCategory
+from .base import BaseAnalyzer
+logger = logging.getLogger(__name__)
+class VirusTotalAnalyzer(BaseAnalyzer):
+    """
+    Analyzer that checks binary files against VirusTotal using hash lookups.
+    Only scans binary file types (images, PDFs, executables, archives).
+    Excludes text-based code files (.py, .js, .md, .txt, .json, .yaml, etc.).
+    """
+    # Binary file extensions to scan
+    BINARY_EXTENSIONS = {
+        # Images
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".bmp",
+        ".ico",
+        ".svg",
+        ".webp",
+        ".tiff",
+        # Documents
+        ".pdf",
+        ".doc",
+        ".docx",
+        ".xls",
+        ".xlsx",
+        ".ppt",
+        ".pptx",
+        # Archives
+        ".zip",
+        ".tar",
+        ".gz",
+        ".bz2",
+        ".7z",
+        ".rar",
+        ".tgz",
+        # Executables
+        ".exe",
+        ".dll",
+        ".so",
+        ".dylib",
+        ".bin",
+        ".com",  # .com = MS-DOS executables
+        # Other binaries
+        ".wasm",
+        ".class",
+        ".jar",
+        ".war",
+    }
+    # Text/code extensions to EXCLUDE from scanning
+    EXCLUDED_EXTENSIONS = {
+        ".py",
+        ".js",
+        ".ts",
+        ".jsx",
+        ".tsx",
+        ".java",
+        ".c",
+        ".cpp",
+        ".h",
+        ".hpp",
+        ".go",
+        ".rs",
+        ".rb",
+        ".php",
+        ".swift",
+        ".kt",
+        ".cs",
+        ".vb",
+        ".md",
+        ".txt",
+        ".json",
+        ".yaml",
+        ".yml",
+        ".toml",
+        ".ini",
+        ".conf",
+        ".cfg",
+        ".xml",
+        ".html",
+        ".css",
+        ".scss",
+        ".sass",
+        ".less",
+        ".sh",
+        ".bash",
+        ".zsh",
+        ".fish",
+        ".ps1",
+        ".bat",
+        ".cmd",
+        ".sql",
+        ".graphql",
+        ".proto",
+        ".thrift",
+        ".rst",
+        ".org",
+        ".adoc",
+        ".tex",
+    }
+    def __init__(self, api_key: str | None = None, enabled: bool = True, upload_files: bool = False):
+        """
+        Initialize VirusTotal analyzer.
+        Args:
+            api_key: VirusTotal API key (optional, can be set via environment)
+            enabled: Whether the analyzer is enabled (default: True)
+            upload_files: If True, upload files to VT for scanning. If False (default),
+                         only check existing hashes (more privacy-friendly)
+        """
+        super().__init__("virustotal_analyzer")
+        self.api_key = api_key
+        self.enabled = enabled and api_key is not None
+        self.upload_files = upload_files
+        self.validated_binary_files = []  # Track files validated as safe by VirusTotal
+        self.base_url = "https://www.virustotal.com/api/v3"
+        self.session = httpx.Client()
+        if not self.api_key:
+            logger.warning("VirusTotal API key is missing!")
+        if self.api_key:
+            self.session.headers.update({"x-apikey": self.api_key, "Accept": "application/json"})
+            logger.info("VirusTotal API key configured (length: %d)", len(self.api_key))
+        else:
+            logger.warning("VirusTotal analyzer initialized without API key")
+    def analyze(self, skill: Skill) -> list[Finding]:
+        """
+        Analyze binary files in the skill using VirusTotal hash lookups.
+        Args:
+            skill: The skill to analyze
+        Returns:
+            List of findings for malicious files. Also stores validated file paths
+            in skill metadata to allow suppression of binary file warnings.
+        """
+        if not self.enabled:
+            return []
+        findings = []
+        validated_files = []  # Track files validated as safe
+        # Only scan binary files
+        binary_files = [f for f in skill.files if self._is_binary_file(f.relative_path)]
+        for skill_file in binary_files:
+            try:
+                file_path = Path(skill.directory) / skill_file.relative_path
+                file_hash = self._calculate_sha256(file_path)
+                logger.info("Checking file: %s (SHA256: %s)", skill_file.relative_path, file_hash)
+                vt_result, hash_found = self._query_virustotal(file_hash)
+                if hash_found:
+                    total = vt_result.get("total_engines", 0)
+                    malicious = vt_result.get("malicious", 0)
+                    suspicious = vt_result.get("suspicious", 0)
+                    if malicious > 0 or suspicious > 0:
+                        logger.warning(
+                            "Found in VT database: %d malicious, %d suspicious out of %d vendors",
+                            malicious,
+                            suspicious,
+                            total,
+                        )
+                    else:
+                        logger.info("Found in VT database: %d/%d vendors flagged (file appears safe)", malicious, total)
+                        validated_files.append(skill_file.relative_path)
+                    if vt_result.get("permalink"):
+                        logger.info("Report: %s", vt_result["permalink"])
+                    if malicious > 0:
+                        findings.append(
+                            self._create_finding(skill_file=skill_file, file_hash=file_hash, vt_result=vt_result)
+                        )
+                elif self.upload_files:
+                    logger.warning("Hash not found in VT database - uploading for analysis")
+                    vt_result = self._upload_and_scan(file_path, file_hash)
+                    if vt_result:
+                        if vt_result.get("malicious", 0) > 0:
+                            findings.append(
+                                self._create_finding(skill_file=skill_file, file_hash=file_hash, vt_result=vt_result)
+                            )
+                        else:
+                            validated_files.append(skill_file.relative_path)
+                else:
+                    logger.warning("Hash not found in VT database - upload disabled, cannot scan unknown file")
+            except Exception as e:
+                logger.warning("VirusTotal scan failed for %s: %s", skill_file.relative_path, e)
+                continue
+        # Store validated files in analyzer instance for post-processing
+        self.validated_binary_files = validated_files
+        return findings
+    def _is_binary_file(self, file_path: str) -> bool:
+        """
+        Check if a file should be scanned (is binary, not code).
+        Args:
+            file_path: Path to the file
+        Returns:
+            True if file should be scanned
+        """
+        path = Path(file_path)
+        ext = path.suffix.lower()
+        # Explicitly exclude text/code files
+        if ext in self.EXCLUDED_EXTENSIONS:
+            return False
+        # Include known binary extensions
+        if ext in self.BINARY_EXTENSIONS:
+            return True
+        # For unknown extensions, default to not scanning
+        # (conservative approach to avoid scanning code files)
+        return False
+    def _calculate_sha256(self, file_path: Path) -> str:
+        """
+        Calculate SHA256 hash of a file.
+        Args:
+            file_path: Path to the file
+        Returns:
+            SHA256 hash as hex string
+        """
+        sha256_hash = hashlib.sha256()
+        with open(file_path, "rb") as f:
+            # Read file in chunks for memory efficiency
+            for byte_block in iter(lambda: f.read(4096), b""):
+                sha256_hash.update(byte_block)
+        return sha256_hash.hexdigest()
+    def _query_virustotal(self, file_hash: str) -> tuple[dict | None, bool]:
+        """
+        Query VirusTotal API for file hash.
+        Args:
+            file_hash: SHA256 hash of the file
+        Returns:
+            Tuple of (detection stats dictionary or None, hash_found boolean)
+            - If hash found: (stats_dict, True)
+            - If hash not found (404): (None, False)
+            - If error: (None, False)
+        """
+        try:
+            response = self.session.get(f"{self.base_url}/files/{file_hash}", timeout=10)
+            if response.status_code == 404:
+                # File hash not in VirusTotal database (never scanned before)
+                return None, False
+            if response.status_code == 200:
+                data = response.json()
+                # Extract detection statistics
+                stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
+                # Construct GUI URL (not API endpoint)
+                gui_url = f"https://www.virustotal.com/gui/file/{file_hash}"
+                result = {
+                    "malicious": stats.get("malicious", 0),
+                    "suspicious": stats.get("suspicious", 0),
+                    "undetected": stats.get("undetected", 0),
+                    "harmless": stats.get("harmless", 0),
+                    "total_engines": sum(stats.values()),
+                    "scan_date": data.get("data", {}).get("attributes", {}).get("last_analysis_date"),
+                    "permalink": gui_url,
+                }
+                return result, True
+            if response.status_code == 429:
+                logger.warning("VirusTotal rate limit exceeded. Please wait before retrying.")
+            else:
+                logger.warning("VirusTotal API returned status %d", response.status_code)
+            return None, False
+        except httpx.RequestError as e:
+            logger.warning("VirusTotal API request failed: %s", e)
+            return None, False
+    def _upload_and_scan(self, file_path: Path, file_hash: str) -> dict | None:
+        """
+        Upload file to VirusTotal for scanning.
+        Args:
+            file_path: Path to the file to upload
+            file_hash: SHA256 hash of the file
+        Returns:
+            Dictionary with detection stats or None if upload failed
+        """
+        try:
+            import time
+            file_size = file_path.stat().st_size
+            if file_size > 32 * 1024 * 1024:
+                logger.warning("File too large to upload to VT: %s (%d bytes)", file_path.name, file_size)
+                return None
+            with open(file_path, "rb") as f:
+                files = {"file": (file_path.name, f)}
+                response = self.session.post(f"{self.base_url}/files", files=files, timeout=60)
+            if response.status_code != 200:
+                logger.warning("File upload failed with status %d", response.status_code)
+                return None
+            upload_data = response.json()
+            analysis_id = upload_data.get("data", {}).get("id")
+            if not analysis_id:
+                logger.warning("No analysis ID returned from upload")
+                return None
+            logger.info("File uploaded successfully. Analysis ID: %s", analysis_id)
+            max_retries = 6
+            for attempt in range(max_retries):
+                time.sleep(10)
+                analysis_response = self.session.get(f"{self.base_url}/analyses/{analysis_id}", timeout=10)
+                if analysis_response.status_code == 200:
+                    analysis_data = analysis_response.json()
+                    status = analysis_data.get("data", {}).get("attributes", {}).get("status")
+                    stats = analysis_data.get("data", {}).get("attributes", {}).get("stats", {})
+                    if status == "completed":
+                        result, _ = self._query_virustotal(file_hash)
+                        if result and result.get("total_engines", 0) > 0:
+                            logger.info(
+                                "Analysis complete: %d/%d vendors scanned",
+                                result.get("malicious", 0),
+                                result.get("total_engines", 0),
+                            )
+                            return result
+                    else:
+                        total_scans = sum(stats.values()) if stats else 0
+                        logger.info(
+                            "Status: %s (%d engines scanned, attempt %d/%d)",
+                            status,
+                            total_scans,
+                            attempt + 1,
+                            max_retries,
+                        )
+                else:
+                    logger.warning("Analysis query failed with status %d", analysis_response.status_code)
+            logger.warning("Analysis still processing after %d seconds", max_retries * 10)
+            result, _ = self._query_virustotal(file_hash)
+            return result
+        except httpx.RequestError as e:
+            logger.warning("File upload to VirusTotal failed: %s", e)
+            return None
+        except Exception as e:
+            logger.warning("Unexpected error during file upload: %s", e)
+            return None
+    def _create_finding(self, skill_file, file_hash: str, vt_result: dict) -> Finding:
+        """
+        Create a finding for a malicious file.
+        Args:
+            skill_file: The SkillFile object
+            file_hash: SHA256 hash of the file
+            vt_result: VirusTotal scan results
+        Returns:
+            Finding object
+        """
+        malicious_count = vt_result.get("malicious", 0)
+        total_engines = vt_result.get("total_engines", 0)
+        # Determine severity based on detection ratio
+        if total_engines > 0:
+            detection_ratio = malicious_count / total_engines
+            if detection_ratio >= 0.3:  # 30%+ detection rate
+                severity = Severity.CRITICAL
+            elif detection_ratio >= 0.1:  # 10-30% detection rate
+                severity = Severity.HIGH
+            else:
+                severity = Severity.MEDIUM
+        else:
+            severity = Severity.MEDIUM
+        return Finding(
+            id=f"VT_{file_hash[:8]}",
+            rule_id="VIRUSTOTAL_MALICIOUS_FILE",
+            category=ThreatCategory.MALWARE,
+            severity=severity,
+            title=f"Malicious file detected: {skill_file.relative_path}",
+            description=(
+                f"VirusTotal detected this file as malicious. "
+                f"{malicious_count}/{total_engines} security vendors flagged this file. "
+                f"SHA256: {file_hash}"
+            ),
+            file_path=skill_file.relative_path,
+            line_number=None,
+            snippet=f"File hash: {file_hash}",
+            remediation=(
+                "Remove this file from the skill package. "
+                "Binary files flagged by multiple antivirus engines should not be included."
+            ),
+            analyzer="virustotal",
+            metadata={
+                "confidence": 0.95 if malicious_count >= 5 else 0.8,
+                "references": [f"https://www.virustotal.com/gui/file/{file_hash}"],
+                "file_hash": file_hash,
+            },
+        )

skillanalyzer/core/exceptions.py ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2026 Cisco Systems, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Skill Analyzer exceptions.
+This module defines custom exceptions for Skill Analyzer operations.
+All exceptions inherit from SkillAnalyzerError for easy catching.
+Example:
+    >>> from skillanalyzer import Scanner
+    >>> from skillanalyzer.core.exceptions import SkillLoadError
+    >>>
+    >>> scanner = Scanner()
+    >>>
+    >>> try:
+    ...     skill = scanner.load_skill("path/to/skill")
+    ... except SkillLoadError as e:
+    ...     print(f"Failed to load skill: {e}")
+    ... except SkillAnalysisError as e:
+    ...     print(f"Analysis failed: {e}")
+"""
+class SkillAnalyzerError(Exception):
+    """Base exception for all Skill Analyzer errors."""
+    pass
+class SkillLoadError(SkillAnalyzerError):
+    """Raised when unable to load a skill package.
+    This can indicate:
+    - Missing SKILL.md file
+    - Invalid YAML frontmatter
+    - Corrupted skill package
+    - File system errors
+    """
+    pass
+class SkillAnalysisError(SkillAnalyzerError):
+    """Raised when skill analysis fails.
+    This typically indicates:
+    - Analyzer configuration errors
+    - Internal analysis errors
+    - Resource exhaustion during analysis
+    """
+    pass
+class SkillValidationError(SkillAnalyzerError):
+    """Raised when skill validation fails.
+    This indicates:
+    - Invalid skill manifest
+    - Missing required fields
+    - Invalid skill structure
+    """
+    pass