PyPI - cisco-ai-skill-scanner - Versions diffs - 1.0.0__py3-none-any.whl - Mend

cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
skillanalyzer/__init__.py +45 -0
skillanalyzer/_version.py +34 -0
skillanalyzer/api/__init__.py +25 -0
skillanalyzer/api/api.py +34 -0
skillanalyzer/api/api_cli.py +78 -0
skillanalyzer/api/api_server.py +634 -0
skillanalyzer/api/router.py +527 -0
skillanalyzer/cli/__init__.py +25 -0
skillanalyzer/cli/cli.py +816 -0
skillanalyzer/config/__init__.py +26 -0
skillanalyzer/config/config.py +149 -0
skillanalyzer/config/config_parser.py +122 -0
skillanalyzer/config/constants.py +85 -0
skillanalyzer/core/__init__.py +24 -0
skillanalyzer/core/analyzers/__init__.py +75 -0
skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
skillanalyzer/core/analyzers/base.py +53 -0
skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
skillanalyzer/core/analyzers/static.py +1105 -0
skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
skillanalyzer/core/exceptions.py +77 -0
skillanalyzer/core/loader.py +377 -0
skillanalyzer/core/models.py +300 -0
skillanalyzer/core/reporters/__init__.py +26 -0
skillanalyzer/core/reporters/json_reporter.py +65 -0
skillanalyzer/core/reporters/markdown_reporter.py +209 -0
skillanalyzer/core/reporters/sarif_reporter.py +246 -0
skillanalyzer/core/reporters/table_reporter.py +195 -0
skillanalyzer/core/rules/__init__.py +19 -0
skillanalyzer/core/rules/patterns.py +165 -0
skillanalyzer/core/rules/yara_scanner.py +157 -0
skillanalyzer/core/scanner.py +437 -0
skillanalyzer/core/static_analysis/__init__.py +27 -0
skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
skillanalyzer/core/static_analysis/context_extractor.py +742 -0
skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
skillanalyzer/core/static_analysis/types/__init__.py +36 -0
skillanalyzer/data/__init__.py +30 -0
skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
skillanalyzer/data/prompts/llm_response_schema.json +71 -0
skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
skillanalyzer/data/prompts/unified_response_schema.md +97 -0
skillanalyzer/data/rules/signatures.yaml +440 -0
skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
skillanalyzer/data/yara_rules/code_execution.yara +61 -0
skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
skillanalyzer/data/yara_rules/command_injection.yara +54 -0
skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
skillanalyzer/data/yara_rules/script_injection.yara +83 -0
skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
skillanalyzer/hooks/__init__.py +21 -0
skillanalyzer/hooks/pre_commit.py +450 -0
skillanalyzer/threats/__init__.py +25 -0
skillanalyzer/threats/threats.py +480 -0
skillanalyzer/utils/__init__.py +28 -0
skillanalyzer/utils/command_utils.py +129 -0
skillanalyzer/utils/di_container.py +154 -0
skillanalyzer/utils/file_utils.py +86 -0
skillanalyzer/utils/logging_config.py +96 -0
skillanalyzer/utils/logging_utils.py +71 -0

skillanalyzer/core/analyzers/llm_prompt_builder.py ADDED Viewed

@@ -0,0 +1,270 @@
+# Copyright 2026 Cisco Systems, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""
+LLM Prompt Builder.
+Handles prompt construction with injection protection using random delimiters.
+"""
+import secrets
+from pathlib import Path
+from ...core.models import Skill
+class PromptBuilder:
+    """Builds analysis prompts with injection protection."""
+    def __init__(self):
+        """Initialize prompt builder and load prompts."""
+        self.protection_rules = ""
+        self.threat_analysis_prompt = ""
+        self._load_prompts()
+    def _load_prompts(self):
+        """Load analysis prompts from markdown files."""
+        prompts_dir = Path(__file__).parent.parent.parent / "data" / "prompts"
+        try:
+            protection_file = prompts_dir / "boilerplate_protection_rule_prompt.md"
+            threat_file = prompts_dir / "skill_threat_analysis_prompt.md"
+            if protection_file.exists():
+                self.protection_rules = protection_file.read_text(encoding="utf-8")
+            else:
+                print(f"Warning: Protection rules file not found at {protection_file}")
+                self.protection_rules = "You are a security analyst analyzing Claude Skills."
+            if threat_file.exists():
+                self.threat_analysis_prompt = threat_file.read_text(encoding="utf-8")
+            else:
+                print(f"Warning: Threat analysis prompt not found at {threat_file}")
+                self.threat_analysis_prompt = "Analyze for security threats."
+        except Exception as e:
+            print(f"Warning: Failed to load prompts: {e}")
+            self.protection_rules = "You are a security analyst analyzing Claude Skills."
+            self.threat_analysis_prompt = "Analyze for security threats."
+    def build_threat_analysis_prompt(
+        self,
+        skill_name: str,
+        description: str,
+        manifest_details: str,
+        instruction_body: str,
+        code_files: str,
+        referenced_files: str,
+    ) -> tuple[str, bool]:
+        """
+        Create threat analysis prompt with prompt injection protection.
+        Uses random delimiter tags to prevent prompt injection attacks.
+        Args:
+            skill_name: Name of the skill
+            description: Skill description
+            manifest_details: YAML manifest details
+            instruction_body: SKILL.md content
+            code_files: Formatted code files
+            referenced_files: Referenced files
+        Returns:
+            Tuple of (prompt, injection_detected)
+        """
+        # Generate random delimiter tags
+        random_id = secrets.token_hex(16)
+        start_tag = f"<!---UNTRUSTED_INPUT_START_{random_id}--->"
+        end_tag = f"<!---UNTRUSTED_INPUT_END_{random_id}--->"
+        # Build comprehensive analysis content
+        analysis_content = f"""Skill Name: {skill_name}
+Description: {description}
+YAML Manifest Details:
+{manifest_details}
+Instruction Body (SKILL.md markdown):
+{instruction_body}
+Script Files (Python/Bash):
+{code_files}
+Referenced Files:
+{referenced_files}
+"""
+        # Check for delimiter injection (security violation)
+        injection_detected = start_tag in analysis_content or end_tag in analysis_content
+        if injection_detected:
+            print(f"WARNING: Potential prompt injection detected in skill {skill_name}")
+        # Replace placeholders with random tags
+        protected_rules = self.protection_rules.replace("<!---UNTRUSTED_INPUT_START--->", start_tag).replace(
+            "<!---UNTRUSTED_INPUT_END--->", end_tag
+        )
+        # Construct full prompt
+        prompt = f"""{protected_rules}
+{self.threat_analysis_prompt}
+{start_tag}
+{analysis_content}
+{end_tag}
+"""
+        return prompt.strip(), injection_detected
+    def format_manifest(self, manifest) -> str:
+        """Format YAML manifest for LLM analysis."""
+        lines = []
+        lines.append(f"- name: {manifest.name}")
+        lines.append(f"- description: {manifest.description}")
+        lines.append(f"- license: {manifest.license or 'Not specified'}")
+        lines.append(f"- compatibility: {manifest.compatibility or 'Not specified'}")
+        lines.append(
+            f"- allowed-tools: {', '.join(manifest.allowed_tools) if manifest.allowed_tools else 'Not specified'}"
+        )
+        if manifest.metadata:
+            lines.append(f"- additional metadata: {manifest.metadata}")
+        return "\n".join(lines)
+    def format_code_files(self, skill: Skill) -> str:
+        """Format code files for LLM analysis."""
+        lines = []
+        for skill_file in skill.get_scripts():
+            content = skill_file.read_content()
+            if content:
+                truncated = content[:1500]
+                if len(content) > 1500:
+                    truncated += f"\n... (truncated, total {len(content)} chars)"
+                lines.append(f"**File: {skill_file.relative_path}**")
+                lines.append("```" + skill_file.file_type)
+                lines.append(truncated)
+                lines.append("```")
+                lines.append("")
+        return "\n".join(lines) if lines else "No script files found."
+    def _is_path_within_directory(self, path: Path, directory: Path) -> bool:
+        """
+        Check if a path is within a directory (prevents path traversal attacks).
+        Args:
+            path: The path to check (will be resolved)
+            directory: The directory that should contain the path
+        Returns:
+            True if the path is within the directory, False otherwise
+        """
+        try:
+            # Resolve both paths to absolute paths, resolving symlinks
+            resolved_path = path.resolve()
+            resolved_directory = directory.resolve()
+            # Check if the resolved path starts with the directory path
+            # Using os.path.commonpath is more robust than string comparison
+            return resolved_path.is_relative_to(resolved_directory)
+        except (ValueError, OSError):
+            # is_relative_to raises ValueError if paths are on different drives (Windows)
+            # or other path resolution issues
+            return False
+    def format_referenced_files(self, skill: Skill, max_file_size: int = 2000) -> str:
+        """
+        Format referenced files for LLM analysis, including their content.
+        This is critical for detecting hidden malicious payloads in referenced
+        instruction files (e.g., rules/logic.md containing curl commands).
+        SECURITY: Only reads files within the skill directory to prevent
+        path traversal attacks (e.g., ../../../.env exfiltration).
+        Args:
+            skill: The skill being analyzed
+            max_file_size: Maximum characters to include per file (default 2000)
+        Returns:
+            Formatted string with referenced file contents
+        """
+        if not skill.referenced_files:
+            return "No referenced files."
+        lines = []
+        lines.append(f"Files referenced in instructions: {', '.join(skill.referenced_files)}")
+        lines.append("")
+        for ref_file_path in skill.referenced_files:
+            # Skip paths that look like path traversal attempts
+            if ".." in ref_file_path or ref_file_path.startswith("/"):
+                lines.append(f"**Referenced File: {ref_file_path}** (blocked: path traversal attempt)")
+                lines.append("")
+                continue
+            # Try to find the file in the skill directory
+            full_path = skill.directory / ref_file_path
+            if not full_path.exists():
+                # Try alternative locations (all within skill directory)
+                alt_paths = [
+                    skill.directory / "rules" / Path(ref_file_path).name,
+                    skill.directory / "references" / ref_file_path,
+                    skill.directory / "assets" / ref_file_path,
+                    skill.directory / "templates" / ref_file_path,
+                ]
+                for alt in alt_paths:
+                    if alt.exists():
+                        full_path = alt
+                        break
+            if not full_path.exists():
+                lines.append(f"**Referenced File: {ref_file_path}** (not found)")
+                lines.append("")
+                continue
+            # SECURITY: Verify the resolved path is within the skill directory
+            # This prevents path traversal attacks like ../../../.env
+            if not self._is_path_within_directory(full_path, skill.directory):
+                lines.append(f"**Referenced File: {ref_file_path}** (blocked: outside skill directory)")
+                lines.append("")
+                continue
+            try:
+                content = full_path.read_text(encoding="utf-8")
+                # Truncate if too large
+                truncated = content[:max_file_size]
+                if len(content) > max_file_size:
+                    truncated += f"\n... (truncated, total {len(content)} chars)"
+                # Determine file type for syntax highlighting
+                suffix = full_path.suffix.lower()
+                file_type = "markdown" if suffix in (".md", ".markdown") else "text"
+                lines.append(f"**Referenced File: {ref_file_path}**")
+                lines.append(f"```{file_type}")
+                lines.append(truncated)
+                lines.append("```")
+                lines.append("")
+            except Exception as e:
+                lines.append(f"**Referenced File: {ref_file_path}** (error reading: {e})")
+                lines.append("")
+        return "\n".join(lines)

skillanalyzer/core/analyzers/llm_provider_config.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright 2026 Cisco Systems, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""
+LLM Provider Configuration Handler.
+Handles detection and configuration of different LLM providers
+(Anthropic, OpenAI, Azure, Bedrock, Gemini).
+"""
+import importlib.util
+import os
+# Check for Google GenAI availability
+# Wrap in try/except because find_spec can raise ModuleNotFoundError
+# if the google namespace package is in a broken state
+try:
+    GOOGLE_GENAI_AVAILABLE = importlib.util.find_spec("google.genai") is not None
+except (ImportError, ModuleNotFoundError):
+    GOOGLE_GENAI_AVAILABLE = False
+# Check for LiteLLM availability
+try:
+    LITELLM_AVAILABLE = importlib.util.find_spec("litellm") is not None
+except (ImportError, ModuleNotFoundError):
+    LITELLM_AVAILABLE = False
+class ProviderConfig:
+    """Handles LLM provider detection and configuration."""
+    def __init__(
+        self,
+        model: str,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        api_version: str | None = None,
+        aws_region: str | None = None,
+        aws_profile: str | None = None,
+        aws_session_token: str | None = None,
+    ):
+        """
+        Initialize provider configuration.
+        Args:
+            model: Model identifier
+            api_key: API key (if None, reads from environment)
+            base_url: Custom base URL (for Azure)
+            api_version: API version (for Azure)
+            aws_region: AWS region (for Bedrock)
+            aws_profile: AWS profile name (for Bedrock)
+            aws_session_token: AWS session token (for Bedrock)
+        """
+        self.model = model
+        self.base_url = base_url
+        self.api_version = api_version
+        self.aws_region = aws_region or os.getenv("AWS_REGION", "us-east-1")
+        self.aws_profile = aws_profile or os.getenv("AWS_PROFILE")
+        self.aws_session_token = aws_session_token or os.getenv("AWS_SESSION_TOKEN")
+        # Detect provider type from model string
+        model_lower = model.lower()
+        self.is_bedrock = "bedrock/" in model or model_lower.startswith("bedrock/")
+        self.is_gemini = "gemini" in model_lower or model_lower.startswith("gemini/")
+        self.is_azure = model_lower.startswith("azure/") or "azure" in model_lower
+        self.is_vertex = model_lower.startswith("vertex_ai/") or "vertex" in model_lower
+        self.is_ollama = model_lower.startswith("ollama/")
+        self.is_openrouter = model_lower.startswith("openrouter/")
+        # Determine if we should use Google SDK
+        self.use_google_sdk = False
+        # Handle Vertex AI separately (uses LiteLLM, not Google SDK)
+        if self.is_vertex:
+            # Vertex AI models stay as-is for LiteLLM
+            if not LITELLM_AVAILABLE:
+                raise ImportError("LiteLLM is required for Vertex AI. Install with: pip install litellm")
+            self.model = model  # Keep vertex_ai/ prefix for LiteLLM
+        elif self.is_gemini and GOOGLE_GENAI_AVAILABLE:
+            # Google AI Studio (uses Google SDK directly)
+            self.use_google_sdk = True
+            self.model = self._normalize_gemini_model_name(model)
+        elif self.is_gemini and not GOOGLE_GENAI_AVAILABLE:
+            raise ImportError(
+                "For Gemini models, either LiteLLM or google-genai is required. "
+                "Install with: pip install litellm or pip install google-genai"
+            )
+        elif not LITELLM_AVAILABLE:
+            raise ImportError("LiteLLM is required for enhanced LLM analyzer. Install with: pip install litellm")
+        else:
+            # Normalize Gemini model name for LiteLLM (Google AI Studio via LiteLLM)
+            if self.is_gemini and not model.startswith("gemini/"):
+                model_name = model.replace("gemini-", "").replace("gemini/", "")
+                self.model = f"gemini/{model_name}"
+            else:
+                self.model = model
+        # Resolve API key
+        self.api_key = self._resolve_api_key(api_key)
+        # Note: Google SDK client is created per-request, not configured globally
+    def _resolve_api_key(self, api_key: str | None) -> str | None:
+        """Resolve API key from parameter or environment variables.
+        Uses SKILL_SCANNER_LLM_API_KEY consistently for all providers.
+        Special cases:
+        - Vertex AI: Uses GOOGLE_APPLICATION_CREDENTIALS (service account)
+        - Ollama: No API key needed (local)
+        """
+        if api_key is not None:
+            return api_key
+        # Special cases with different auth mechanisms
+        if self.is_vertex:
+            # Vertex AI uses Google Cloud service account credentials
+            return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+        elif self.is_ollama:
+            # Ollama is local and typically doesn't need API key
+            return None
+        # All providers (including Bedrock, Gemini, OpenAI, Anthropic, Azure):
+        # Use SKILL_SCANNER_LLM_API_KEY
+        return os.getenv("SKILL_SCANNER_LLM_API_KEY")
+    def _normalize_gemini_model_name(self, model: str) -> str:
+        """
+        Normalize Gemini model name for Google GenAI SDK (new SDK).
+        Handles various input formats:
+        - gemini-1.5-pro -> models/gemini-1.5-pro (or models/gemini-pro-latest)
+        - gemini-2.5-flash -> models/gemini-2.5-flash
+        - gemini/2.0-flash -> models/gemini-2.0-flash
+        - models/gemini-2.5-pro -> models/gemini-2.5-pro (already correct)
+        Args:
+            model: Input model name
+        Returns:
+            Normalized model name for Google SDK (with models/ prefix)
+        """
+        # Remove any "gemini/" prefix (LiteLLM format)
+        model_name = model.replace("gemini/", "")
+        # Remove models/ prefix if present (will add it back)
+        model_name = model_name.replace("models/", "")
+        # Map legacy model names to available models
+        model_mapping = {
+            "gemini-1.5-pro": "gemini-pro-latest",  # Map to latest available
+            "gemini-1.5-flash": "gemini-flash-latest",  # Map to latest available
+        }
+        if model_name in model_mapping:
+            model_name = model_mapping[model_name]
+        # If it's just a version/variant, add "gemini-" prefix
+        if not model_name.startswith("gemini-"):
+            model_name = f"gemini-{model_name}"
+        # Add models/ prefix for new SDK
+        if not model_name.startswith("models/"):
+            model_name = f"models/{model_name}"
+        return model_name
+    def validate(self) -> None:
+        """Validate that configuration is complete."""
+        if not self.is_bedrock and not self.api_key:
+            raise ValueError(f"API key required for model {self.model}")
+    def get_request_params(self) -> dict:
+        """Get request parameters for LiteLLM."""
+        params = {}
+        if self.api_key:
+            if self.is_gemini:
+                # For Google AI Studio, LiteLLM uses GEMINI_API_KEY environment variable
+                if not os.getenv("GEMINI_API_KEY"):
+                    os.environ["GEMINI_API_KEY"] = self.api_key
+            else:
+                # Pass api_key for all providers including Bedrock (bearer token auth)
+                params["api_key"] = self.api_key
+        if self.base_url:
+            params["api_base"] = self.base_url
+        if self.api_version:
+            params["api_version"] = self.api_version
+        if self.is_bedrock:
+            # AWS Bedrock supports:
+            # 1. Bearer token auth via api_key (format: bedrock-api-key-*)
+            # 2. IAM credentials via boto3 (falls back if no bearer token)
+            if self.aws_region:
+                params["aws_region_name"] = self.aws_region
+            if self.aws_session_token:
+                params["aws_session_token"] = self.aws_session_token
+            if self.aws_profile:
+                params["aws_profile_name"] = self.aws_profile
+        return params