cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,53 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Base analyzer interface for skill security scanning.
19
+ """
20
+
21
+ from abc import ABC, abstractmethod
22
+
23
+ from ..models import Finding, Skill
24
+
25
+
26
+ class BaseAnalyzer(ABC):
27
+ """Abstract base class for all security analyzers."""
28
+
29
+ def __init__(self, name: str):
30
+ """
31
+ Initialize analyzer.
32
+
33
+ Args:
34
+ name: Name of the analyzer
35
+ """
36
+ self.name = name
37
+
38
+ @abstractmethod
39
+ def analyze(self, skill: Skill) -> list[Finding]:
40
+ """
41
+ Analyze a skill for security issues.
42
+
43
+ Args:
44
+ skill: The skill to analyze
45
+
46
+ Returns:
47
+ List of security findings
48
+ """
49
+ pass
50
+
51
+ def get_name(self) -> str:
52
+ """Get the analyzer name."""
53
+ return self.name
@@ -0,0 +1,30 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Behavioral Analysis Package for Claude Skills.
18
+
19
+ This package provides enhanced behavioral analysis capabilities including:
20
+ - Alignment verification between skill description and code behavior
21
+ - LLM-powered semantic analysis
22
+ - Cross-file analysis and call graph tracking
23
+
24
+ Components:
25
+ - alignment/: Alignment verification layer for skill description vs code
26
+ """
27
+
28
+ from .alignment import AlignmentOrchestrator
29
+
30
+ __all__ = ["AlignmentOrchestrator"]
@@ -0,0 +1,45 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Alignment Verification Layer for Semantic Analysis.
18
+
19
+ This package provides the alignment verification layer that checks if skill
20
+ descriptions and manifest claims accurately match their actual implementation behavior.
21
+
22
+ Components:
23
+ - AlignmentOrchestrator: Main orchestrator coordinating alignment verification
24
+ - AlignmentPromptBuilder: Constructs comprehensive prompts with evidence
25
+ - AlignmentLLMClient: Handles LLM API interaction for verification
26
+ - AlignmentResponseValidator: Validates and parses LLM responses
27
+ - ThreatVulnerabilityClassifier: Classifies findings as threats or vulnerabilities
28
+
29
+ All components use the 'alignment_' prefix to indicate they are part of
30
+ the semantic alignment verification layer.
31
+ """
32
+
33
+ from .alignment_llm_client import AlignmentLLMClient
34
+ from .alignment_orchestrator import AlignmentOrchestrator
35
+ from .alignment_prompt_builder import AlignmentPromptBuilder
36
+ from .alignment_response_validator import AlignmentResponseValidator
37
+ from .threat_vulnerability_classifier import ThreatVulnerabilityClassifier
38
+
39
+ __all__ = [
40
+ "AlignmentOrchestrator",
41
+ "AlignmentPromptBuilder",
42
+ "AlignmentLLMClient",
43
+ "AlignmentResponseValidator",
44
+ "ThreatVulnerabilityClassifier",
45
+ ]
@@ -0,0 +1,240 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Alignment LLM Client for Semantic Verification.
18
+
19
+ This module handles all LLM API interactions specifically for semantic alignment
20
+ verification between skill descriptions and their implementation.
21
+
22
+ The client manages:
23
+ - LLM configuration (API keys, endpoints, models)
24
+ - Request construction for alignment verification
25
+ - API communication via litellm
26
+ - Response retrieval with retry logic
27
+ """
28
+
29
+ import asyncio
30
+ import logging
31
+ import os
32
+
33
+ try:
34
+ from litellm import acompletion
35
+
36
+ LITELLM_AVAILABLE = True
37
+ except (ImportError, ModuleNotFoundError):
38
+ LITELLM_AVAILABLE = False
39
+ acompletion = None
40
+
41
+
42
+ class AlignmentLLMClient:
43
+ """LLM client for semantic alignment verification queries.
44
+
45
+ Handles communication with LLM providers (OpenAI, Azure, Gemini, Bedrock, etc.)
46
+ specifically for alignment verification tasks.
47
+
48
+ Uses litellm for unified interface across providers and per-request
49
+ parameter passing to avoid configuration conflicts.
50
+ """
51
+
52
+ # Default configuration
53
+ DEFAULT_MAX_RETRIES = 3
54
+ DEFAULT_RETRY_BASE_DELAY = 2
55
+ PROMPT_LENGTH_THRESHOLD = 50000 # Warn if prompt exceeds this
56
+
57
+ def __init__(
58
+ self,
59
+ model: str = "gemini/gemini-2.0-flash",
60
+ api_key: str | None = None,
61
+ base_url: str | None = None,
62
+ api_version: str | None = None,
63
+ temperature: float = 0.1,
64
+ max_tokens: int = 4096,
65
+ timeout: int = 120,
66
+ ):
67
+ """Initialize the alignment LLM client.
68
+
69
+ Args:
70
+ model: LLM model to use
71
+ api_key: API key (or resolved from environment)
72
+ base_url: Optional base URL for API
73
+ api_version: Optional API version
74
+ temperature: Temperature for responses
75
+ max_tokens: Max tokens for responses
76
+ timeout: Request timeout in seconds
77
+
78
+ Raises:
79
+ ImportError: If litellm is not available
80
+ ValueError: If API key is not provided
81
+ """
82
+ if not LITELLM_AVAILABLE:
83
+ raise ImportError("litellm is required for alignment verification. Install with: pip install litellm")
84
+
85
+ # Resolve API key from environment if not provided
86
+ self._api_key = api_key or self._resolve_api_key(model)
87
+ if not self._api_key and not self._is_bedrock_model(model):
88
+ raise ValueError("LLM provider API key is required for alignment verification")
89
+
90
+ # Store configuration for per-request usage
91
+ self._model = model
92
+ self._base_url = base_url
93
+ self._api_version = api_version
94
+ self._temperature = temperature
95
+ self._max_tokens = max_tokens
96
+ self._timeout = timeout
97
+
98
+ self.logger = logging.getLogger(__name__)
99
+ self.logger.debug(f"AlignmentLLMClient initialized with model: {self._model}")
100
+
101
+ def _resolve_api_key(self, model: str) -> str | None:
102
+ """Resolve API key from environment variables.
103
+
104
+ Args:
105
+ model: Model name to determine provider
106
+
107
+ Returns:
108
+ API key or None
109
+ """
110
+ model_lower = model.lower()
111
+
112
+ # Special cases with different auth mechanisms
113
+ if "vertex" in model_lower:
114
+ # Vertex AI uses Google Cloud service account credentials
115
+ return os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
116
+ elif "ollama" in model_lower:
117
+ # Ollama is local and typically doesn't need API key
118
+ return None
119
+
120
+ # All providers (including Bedrock, Gemini, OpenAI, Anthropic, Azure):
121
+ # Use SKILL_SCANNER_LLM_API_KEY
122
+ return os.environ.get("SKILL_SCANNER_LLM_API_KEY")
123
+
124
+ def _is_bedrock_model(self, model: str) -> bool:
125
+ """Check if model is AWS Bedrock.
126
+
127
+ Args:
128
+ model: Model name
129
+
130
+ Returns:
131
+ True if Bedrock model
132
+ """
133
+ return "bedrock" in model.lower()
134
+
135
+ async def verify_alignment(self, prompt: str) -> str:
136
+ """Send alignment verification prompt to LLM with retry logic.
137
+
138
+ Args:
139
+ prompt: Comprehensive prompt with alignment verification evidence
140
+
141
+ Returns:
142
+ LLM response (JSON string)
143
+
144
+ Raises:
145
+ Exception: If LLM API call fails after retries
146
+ """
147
+ # Log prompt length for debugging
148
+ prompt_length = len(prompt)
149
+ self.logger.debug(f"Prompt length: {prompt_length} characters")
150
+
151
+ # Check against threshold
152
+ if prompt_length > self.PROMPT_LENGTH_THRESHOLD:
153
+ self.logger.warning(
154
+ f"Large prompt detected: {prompt_length} characters "
155
+ f"(threshold: {self.PROMPT_LENGTH_THRESHOLD}) - may be truncated by LLM"
156
+ )
157
+
158
+ # Retry logic with exponential backoff
159
+ max_retries = self.DEFAULT_MAX_RETRIES
160
+ base_delay = self.DEFAULT_RETRY_BASE_DELAY
161
+
162
+ for attempt in range(max_retries):
163
+ try:
164
+ return await self._make_llm_request(prompt)
165
+ except Exception as e:
166
+ if attempt < max_retries - 1:
167
+ delay = base_delay * (2**attempt)
168
+ self.logger.warning(
169
+ f"LLM request failed (attempt {attempt + 1}/{max_retries}): {e}. Retrying in {delay}s..."
170
+ )
171
+ await asyncio.sleep(delay)
172
+ else:
173
+ self.logger.error(f"LLM request failed after {max_retries} attempts: {e}")
174
+ raise
175
+
176
+ async def _make_llm_request(self, prompt: str) -> str:
177
+ """Make a single LLM API request.
178
+
179
+ Args:
180
+ prompt: Prompt to send
181
+
182
+ Returns:
183
+ LLM response content
184
+
185
+ Raises:
186
+ Exception: If API call fails
187
+ """
188
+ try:
189
+ request_params = {
190
+ "model": self._model,
191
+ "messages": [
192
+ {
193
+ "role": "system",
194
+ "content": (
195
+ "You are a security expert analyzing Claude Skills. "
196
+ "You receive complete dataflow analysis and code context. "
197
+ "Analyze if the skill description accurately describes what the code actually does. "
198
+ "Respond ONLY with valid JSON. Do not include any markdown formatting or code blocks."
199
+ ),
200
+ },
201
+ {"role": "user", "content": prompt},
202
+ ],
203
+ "max_tokens": self._max_tokens,
204
+ "temperature": self._temperature,
205
+ "timeout": self._timeout,
206
+ }
207
+
208
+ # Add API key if available
209
+ if self._api_key:
210
+ request_params["api_key"] = self._api_key
211
+
212
+ # Only enable JSON mode for supported models/providers
213
+ # Azure OpenAI with older API versions may not support this
214
+ if not self._model.startswith("azure/"):
215
+ request_params["response_format"] = {"type": "json_object"}
216
+
217
+ # Add optional parameters if configured
218
+ if self._base_url:
219
+ request_params["api_base"] = self._base_url
220
+ if self._api_version:
221
+ request_params["api_version"] = self._api_version
222
+
223
+ self.logger.debug(f"Sending alignment verification request to {self._model}")
224
+ response = await acompletion(**request_params)
225
+
226
+ # Extract content from response
227
+ content = response.choices[0].message.content
228
+
229
+ # Log response for debugging
230
+ if not content or not content.strip():
231
+ self.logger.warning(f"Empty response from LLM model {self._model}")
232
+ self.logger.debug(f"Full response object: {response}")
233
+ else:
234
+ self.logger.debug(f"LLM response length: {len(content)} chars")
235
+
236
+ return content if content else ""
237
+
238
+ except Exception as e:
239
+ self.logger.error(f"LLM alignment verification failed: {e}", exc_info=True)
240
+ raise
@@ -0,0 +1,216 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Alignment Orchestrator - Main Coordinator for Skill Analysis.
18
+
19
+ This module provides the main orchestrator for semantic alignment verification
20
+ between skill descriptions/manifest claims and actual code implementation.
21
+
22
+ The orchestrator coordinates:
23
+ 1. Building comprehensive prompts with evidence
24
+ 2. Querying LLM for alignment verification
25
+ 3. Validating and parsing responses
26
+ 4. Creating security findings for mismatches
27
+ """
28
+
29
+ import logging
30
+ from typing import Any
31
+
32
+ from .....core.static_analysis.context_extractor import SkillFunctionContext
33
+ from .alignment_llm_client import AlignmentLLMClient
34
+ from .alignment_prompt_builder import AlignmentPromptBuilder
35
+ from .alignment_response_validator import AlignmentResponseValidator
36
+ from .threat_vulnerability_classifier import ThreatVulnerabilityClassifier
37
+
38
+
39
+ class AlignmentOrchestrator:
40
+ """Orchestrates semantic alignment verification between skill descriptions and code.
41
+
42
+ This is the main alignment verification layer that coordinates:
43
+ - Prompt building with comprehensive evidence
44
+ - LLM-based alignment verification
45
+ - Response validation and finding creation
46
+ - Threat vs vulnerability classification
47
+
48
+ This class provides a clean interface for alignment checking and hides
49
+ the complexity of prompt construction, LLM interaction, and parsing.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ llm_model: str = "gemini/gemini-2.0-flash",
55
+ llm_api_key: str | None = None,
56
+ llm_base_url: str | None = None,
57
+ llm_temperature: float = 0.1,
58
+ llm_max_tokens: int = 4096,
59
+ llm_timeout: int = 120,
60
+ ):
61
+ """Initialize alignment orchestrator.
62
+
63
+ Args:
64
+ llm_model: LLM model to use (e.g., "gemini/gemini-2.0-flash")
65
+ llm_api_key: API key for the LLM provider
66
+ llm_base_url: Optional base URL for LLM API
67
+ llm_temperature: Temperature for LLM responses
68
+ llm_max_tokens: Max tokens for LLM responses
69
+ llm_timeout: Timeout for LLM requests in seconds
70
+
71
+ Raises:
72
+ ValueError: If LLM API key is not provided
73
+ """
74
+ self.logger = logging.getLogger(__name__)
75
+
76
+ # Initialize alignment verification components
77
+ self.prompt_builder = AlignmentPromptBuilder()
78
+ self.llm_client = AlignmentLLMClient(
79
+ model=llm_model,
80
+ api_key=llm_api_key,
81
+ base_url=llm_base_url,
82
+ temperature=llm_temperature,
83
+ max_tokens=llm_max_tokens,
84
+ timeout=llm_timeout,
85
+ )
86
+ self.response_validator = AlignmentResponseValidator()
87
+ self.threat_vuln_classifier = ThreatVulnerabilityClassifier(
88
+ model=llm_model,
89
+ api_key=llm_api_key,
90
+ base_url=llm_base_url,
91
+ )
92
+
93
+ # Track analysis statistics
94
+ self.stats = {
95
+ "total_analyzed": 0,
96
+ "mismatches_detected": 0,
97
+ "no_mismatch": 0,
98
+ "skipped_invalid_response": 0,
99
+ "skipped_error": 0,
100
+ }
101
+
102
+ self.logger.debug("AlignmentOrchestrator initialized")
103
+
104
+ async def check_alignment(
105
+ self,
106
+ func_context: SkillFunctionContext,
107
+ skill_description: str | None = None,
108
+ ) -> tuple[dict[str, Any], SkillFunctionContext] | None:
109
+ """Check if function behavior aligns with skill description.
110
+
111
+ This is the main entry point for alignment verification. It coordinates
112
+ the full verification pipeline:
113
+ 1. Build comprehensive prompt with evidence
114
+ 2. Query LLM for alignment analysis
115
+ 3. Validate response
116
+ 4. Return analysis and context for Finding creation
117
+
118
+ Args:
119
+ func_context: Complete function context with dataflow analysis
120
+ skill_description: Overall skill description from SKILL.md
121
+
122
+ Returns:
123
+ Tuple of (analysis_dict, func_context) if mismatch detected, None if aligned
124
+ """
125
+ self.stats["total_analyzed"] += 1
126
+
127
+ try:
128
+ # Step 1: Build alignment verification prompt
129
+ self.logger.debug(f"Building alignment prompt for {func_context.name}")
130
+ try:
131
+ prompt = self.prompt_builder.build_prompt(func_context, skill_description=skill_description)
132
+ except Exception as e:
133
+ self.logger.error(f"Prompt building failed for {func_context.name}: {e}", exc_info=True)
134
+ self.stats["skipped_error"] += 1
135
+ raise
136
+
137
+ # Step 2: Query LLM for alignment verification
138
+ self.logger.debug(f"Querying LLM for alignment verification of {func_context.name}")
139
+ try:
140
+ response = await self.llm_client.verify_alignment(prompt)
141
+ except Exception as e:
142
+ self.logger.error(f"LLM verification failed for {func_context.name}: {e}", exc_info=True)
143
+ self.stats["skipped_error"] += 1
144
+ raise
145
+
146
+ # Step 3: Validate and parse response
147
+ self.logger.debug(f"Validating alignment response for {func_context.name}")
148
+ try:
149
+ result = self.response_validator.validate(response)
150
+ except Exception as e:
151
+ self.logger.error(f"Response validation failed for {func_context.name}: {e}", exc_info=True)
152
+ self.stats["skipped_error"] += 1
153
+ raise
154
+
155
+ if not result:
156
+ self.logger.warning(f"Invalid response for {func_context.name}, skipping")
157
+ self.stats["skipped_invalid_response"] += 1
158
+ return None
159
+
160
+ # Step 4: Return analysis if mismatch detected
161
+ if result.get("mismatch_detected"):
162
+ self.logger.debug(f"Alignment mismatch detected in {func_context.name}")
163
+ self.stats["mismatches_detected"] += 1
164
+
165
+ # Step 5: Classify as threat or vulnerability (second alignment layer)
166
+ # Skip classification for INFO severity (documentation issues)
167
+ threat_name = result.get("threat_name", "")
168
+ if threat_name != "GENERAL DESCRIPTION-CODE MISMATCH":
169
+ self.logger.debug(f"Classifying finding as threat or vulnerability for {func_context.name}")
170
+ try:
171
+ classification = await self.threat_vuln_classifier.classify_finding(
172
+ threat_name=result.get("threat_name", "UNKNOWN"),
173
+ severity=result.get("severity", "UNKNOWN"),
174
+ summary=result.get("summary", ""),
175
+ description_claims=result.get("description_claims", ""),
176
+ actual_behavior=result.get("actual_behavior", ""),
177
+ security_implications=result.get("security_implications", ""),
178
+ dataflow_evidence=result.get("dataflow_evidence", ""),
179
+ )
180
+ if classification:
181
+ # Add just the classification value to the result
182
+ result["threat_vulnerability_classification"] = classification["classification"]
183
+ self.logger.debug(
184
+ f"Classified as {classification['classification']} with {classification['confidence']} confidence"
185
+ )
186
+ else:
187
+ self.logger.warning(f"Failed to classify finding for {func_context.name}")
188
+ result["threat_vulnerability_classification"] = "UNCLEAR"
189
+ except Exception as e:
190
+ self.logger.error(f"Classification failed for {func_context.name}: {e}", exc_info=True)
191
+ # Continue without classification - mark as UNCLEAR
192
+ result["threat_vulnerability_classification"] = "UNCLEAR"
193
+
194
+ return (result, func_context)
195
+ else:
196
+ self.logger.debug(f"No alignment mismatch in {func_context.name}")
197
+ self.stats["no_mismatch"] += 1
198
+ return None
199
+
200
+ except Exception as e:
201
+ self.logger.error(f"Alignment check failed for {func_context.name}: {e}")
202
+ self.stats["skipped_error"] += 1
203
+ return None
204
+
205
+ def get_statistics(self) -> dict[str, int]:
206
+ """Get analysis statistics.
207
+
208
+ Returns:
209
+ Dictionary with analysis statistics including:
210
+ - total_analyzed: Total functions analyzed
211
+ - mismatches_detected: Functions with detected mismatches
212
+ - no_mismatch: Functions with no mismatch
213
+ - skipped_invalid_response: Functions skipped due to invalid LLM response
214
+ - skipped_error: Functions skipped due to errors
215
+ """
216
+ return self.stats.copy()