cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,284 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ LLM Request Handler.
19
+
20
+ Handles LLM API requests with retry logic and exponential backoff.
21
+ Supports both LiteLLM and Google Generative AI SDK.
22
+ Uses structured outputs (JSON schema) when available.
23
+ """
24
+
25
+ import asyncio
26
+ import json
27
+ import warnings
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ from .llm_provider_config import ProviderConfig
32
+
33
+ try:
34
+ from litellm import acompletion
35
+
36
+ LITELLM_AVAILABLE = True
37
+ except (ImportError, ModuleNotFoundError):
38
+ LITELLM_AVAILABLE = False
39
+ acompletion = None
40
+
41
+ try:
42
+ from google import genai
43
+
44
+ GOOGLE_GENAI_AVAILABLE = True
45
+ except (ImportError, ModuleNotFoundError):
46
+ GOOGLE_GENAI_AVAILABLE = False
47
+ genai = None
48
+
49
+ # Suppress LiteLLM pydantic serialization warnings (cosmetic, doesn't affect functionality)
50
+ warnings.filterwarnings("ignore", message=".*Pydantic serializer warnings.*")
51
+ warnings.filterwarnings("ignore", message=".*Expected `Message`.*")
52
+ warnings.filterwarnings("ignore", message=".*Expected `StreamingChoices`.*")
53
+ warnings.filterwarnings("ignore", message=".*close_litellm_async_clients.*")
54
+
55
+
56
+ class LLMRequestHandler:
57
+ """Handles LLM API requests with retry logic and structured outputs."""
58
+
59
+ def __init__(
60
+ self,
61
+ provider_config: ProviderConfig,
62
+ max_tokens: int = 4000,
63
+ temperature: float = 0.0,
64
+ max_retries: int = 3,
65
+ rate_limit_delay: float = 2.0,
66
+ timeout: int = 120,
67
+ ):
68
+ """
69
+ Initialize request handler.
70
+
71
+ Args:
72
+ provider_config: Provider configuration
73
+ max_tokens: Maximum tokens for response
74
+ temperature: Sampling temperature
75
+ max_retries: Max retry attempts on rate limits
76
+ rate_limit_delay: Base delay for exponential backoff
77
+ timeout: Request timeout in seconds
78
+ """
79
+ self.provider_config = provider_config
80
+ self.max_tokens = max_tokens
81
+ self.temperature = temperature
82
+ self.max_retries = max_retries
83
+ self.rate_limit_delay = rate_limit_delay
84
+ self.timeout = timeout
85
+
86
+ # Load JSON schema for structured outputs
87
+ self.response_schema = self._load_response_schema()
88
+
89
+ def _load_response_schema(self) -> dict[str, Any] | None:
90
+ """Load JSON schema for structured outputs."""
91
+ try:
92
+ schema_path = Path(__file__).parent.parent.parent / "data" / "prompts" / "llm_response_schema.json"
93
+ if schema_path.exists():
94
+ return json.loads(schema_path.read_text(encoding="utf-8"))
95
+ except Exception as e:
96
+ print(f"Warning: Could not load response schema: {e}")
97
+ return None
98
+
99
+ def _sanitize_schema_for_google(self, schema: dict[str, Any]) -> dict[str, Any]:
100
+ """
101
+ Remove additionalProperties from schema for Google SDK compatibility.
102
+
103
+ Google SDK doesn't support additionalProperties in structured output schemas.
104
+ This recursively removes it from the schema.
105
+ """
106
+ if not isinstance(schema, dict):
107
+ return schema
108
+
109
+ sanitized = {}
110
+ for key, value in schema.items():
111
+ if key == "additionalProperties":
112
+ # Skip additionalProperties - Google SDK doesn't support it
113
+ continue
114
+ elif isinstance(value, dict):
115
+ sanitized[key] = self._sanitize_schema_for_google(value)
116
+ elif isinstance(value, list):
117
+ sanitized[key] = [
118
+ self._sanitize_schema_for_google(item) if isinstance(item, dict) else item for item in value
119
+ ]
120
+ else:
121
+ sanitized[key] = value
122
+
123
+ return sanitized
124
+
125
+ async def make_request(self, messages: list[dict[str, str]], context: str = "") -> str:
126
+ """
127
+ Make LLM request with retry logic and exponential backoff.
128
+
129
+ Args:
130
+ messages: Messages to send (should include system and user messages)
131
+ context: Context for logging
132
+
133
+ Returns:
134
+ Response text content
135
+
136
+ Raises:
137
+ Exception: If all retries exhausted
138
+ """
139
+ if self.provider_config.use_google_sdk:
140
+ # For Google SDK, combine system and user messages into a single prompt
141
+ # Google SDK doesn't have separate system/user roles like OpenAI/Anthropic
142
+ prompt_parts = []
143
+ for msg in messages:
144
+ role = msg.get("role", "user")
145
+ content = msg.get("content", "")
146
+ if role == "system":
147
+ prompt_parts.append(f"System Instructions:\n{content}\n")
148
+ elif role == "user":
149
+ prompt_parts.append(f"User Request:\n{content}\n")
150
+
151
+ combined_prompt = "\n".join(prompt_parts).strip()
152
+ return await self._make_google_sdk_request(combined_prompt)
153
+ else:
154
+ return await self._make_litellm_request(messages, context)
155
+
156
+ async def _make_litellm_request(self, messages: list[dict[str, str]], context: str) -> str:
157
+ """Make request using LiteLLM with structured outputs when supported."""
158
+ last_exception = None
159
+
160
+ for attempt in range(self.max_retries + 1):
161
+ try:
162
+ request_params = {
163
+ "model": self.provider_config.model,
164
+ "messages": messages,
165
+ "max_tokens": self.max_tokens,
166
+ "temperature": self.temperature,
167
+ "timeout": self.timeout,
168
+ **self.provider_config.get_request_params(),
169
+ }
170
+
171
+ # Add structured output support using LiteLLM's unified format
172
+ # According to LiteLLM docs: https://docs.litellm.ai/docs/completion/json_mode
173
+ # Format: response_format={ "type": "json_schema", "json_schema": { "name": "...", "schema": {...}, "strict": true } }
174
+ # Works for: OpenAI, Anthropic Claude, Gemini (via LiteLLM), Bedrock, Vertex AI, Groq, Ollama, Databricks
175
+ if self.response_schema:
176
+ request_params["response_format"] = {
177
+ "type": "json_schema",
178
+ "json_schema": {
179
+ "name": "security_analysis_response",
180
+ "schema": self.response_schema,
181
+ "strict": True, # Enforce strict schema compliance - prevents extra fields
182
+ },
183
+ }
184
+
185
+ response = await acompletion(**request_params)
186
+ return response.choices[0].message.content
187
+
188
+ except Exception as e:
189
+ last_exception = e
190
+ error_msg = str(e).lower()
191
+
192
+ # Check for rate limiting
193
+ if any(
194
+ keyword in error_msg
195
+ for keyword in ["rate limit", "quota", "too many requests", "429", "throttling"]
196
+ ):
197
+ if attempt < self.max_retries:
198
+ delay = (2**attempt) * self.rate_limit_delay
199
+ print(
200
+ f"Rate limit hit for {context}, retrying in {delay}s (attempt {attempt + 1}/{self.max_retries + 1})"
201
+ )
202
+ await asyncio.sleep(delay)
203
+ continue
204
+
205
+ # For other errors, don't retry
206
+ print(f"LLM API error for {context}: {e}")
207
+ break
208
+
209
+ raise last_exception
210
+
211
+ async def _make_google_sdk_request(self, prompt: str) -> str:
212
+ """Make request using Google GenAI SDK (new SDK) with structured outputs."""
213
+ last_exception = None
214
+
215
+ for attempt in range(self.max_retries + 1):
216
+ try:
217
+ # Create client with API key (new SDK uses Client pattern)
218
+ client = genai.Client(api_key=self.provider_config.api_key)
219
+
220
+ # Build generation config with structured output
221
+ # New SDK uses GenerateContentConfig type
222
+ config_dict = {
223
+ "max_output_tokens": self.max_tokens,
224
+ "temperature": self.temperature,
225
+ }
226
+
227
+ # Add structured output support using Google Gemini SDK format
228
+ # According to Gemini docs: https://ai.google.dev/gemini-api/docs/structured-output
229
+ # Format: response_mime_type="application/json" and response_schema={...}
230
+ # Note: Google SDK doesn't support additionalProperties in schema
231
+ if self.response_schema:
232
+ config_dict["response_mime_type"] = "application/json"
233
+ # Remove additionalProperties for Google SDK compatibility
234
+ sanitized_schema = self._sanitize_schema_for_google(self.response_schema)
235
+ config_dict["response_schema"] = sanitized_schema
236
+
237
+ # Generate content using new SDK API
238
+ # New SDK uses client.models.generate_content(model, contents, config)
239
+ loop = asyncio.get_event_loop()
240
+
241
+ def generate():
242
+ # New SDK API: client.models.generate_content(model=..., contents=..., config=...)
243
+ response = client.models.generate_content(
244
+ model=self.provider_config.model,
245
+ contents=prompt,
246
+ config=config_dict,
247
+ )
248
+ return response
249
+
250
+ response = await loop.run_in_executor(None, generate)
251
+
252
+ # Extract text from response (new SDK format)
253
+ # Response has .text attribute directly
254
+ if hasattr(response, "text") and response.text:
255
+ return response.text
256
+ elif hasattr(response, "candidates") and response.candidates:
257
+ # Fallback: check candidates array
258
+ candidate = response.candidates[0]
259
+ if hasattr(candidate, "content") and candidate.content:
260
+ parts = candidate.content.parts if hasattr(candidate.content, "parts") else []
261
+ if parts and hasattr(parts[0], "text"):
262
+ return parts[0].text
263
+ elif hasattr(response, "content"):
264
+ # Another fallback
265
+ return str(response.content)
266
+ else:
267
+ return str(response)
268
+
269
+ except Exception as e:
270
+ last_exception = e
271
+ error_msg = str(e).lower()
272
+
273
+ # Check if retryable
274
+ if "quota" in error_msg or "rate limit" in error_msg or "429" in error_msg:
275
+ if attempt < self.max_retries:
276
+ wait_time = self.rate_limit_delay * (2**attempt)
277
+ await asyncio.sleep(wait_time)
278
+ continue
279
+
280
+ # Non-retryable error - print for debugging
281
+ print(f"LLM analysis failed: {e}")
282
+ raise
283
+
284
+ raise last_exception
@@ -0,0 +1,81 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ LLM Response Parser.
19
+
20
+ Handles parsing of LLM responses, extracting JSON from various formats.
21
+ """
22
+
23
+ import json
24
+ from typing import Any
25
+
26
+
27
+ class ResponseParser:
28
+ """Parses LLM responses and extracts JSON."""
29
+
30
+ @staticmethod
31
+ def parse(response_content: str) -> dict[str, Any]:
32
+ """
33
+ Parse LLM response JSON.
34
+
35
+ Handles multiple formats:
36
+ - Direct JSON
37
+ - JSON in markdown code blocks
38
+ - JSON with surrounding text
39
+
40
+ Args:
41
+ response_content: Raw response content
42
+
43
+ Returns:
44
+ Parsed JSON dictionary
45
+
46
+ Raises:
47
+ ValueError: If JSON cannot be parsed
48
+ """
49
+ if not response_content or not response_content.strip():
50
+ raise ValueError("Empty response from LLM")
51
+
52
+ # Try direct JSON parse
53
+ try:
54
+ return json.loads(response_content.strip())
55
+ except json.JSONDecodeError:
56
+ pass
57
+
58
+ # Try to extract JSON from markdown code blocks
59
+ if "```json" in response_content:
60
+ start = response_content.find("```json") + 7
61
+ end = response_content.find("```", start)
62
+ response_content = response_content[start:end].strip()
63
+ elif "```" in response_content:
64
+ start = response_content.find("```") + 3
65
+ end = response_content.find("```", start)
66
+ response_content = response_content[start:end].strip()
67
+
68
+ # Try to find JSON by braces
69
+ start_idx = response_content.find("{")
70
+ if start_idx != -1:
71
+ brace_count = 0
72
+ for i in range(start_idx, len(response_content)):
73
+ if response_content[i] == "{":
74
+ brace_count += 1
75
+ elif response_content[i] == "}":
76
+ brace_count -= 1
77
+ if brace_count == 0:
78
+ json_str = response_content[start_idx : i + 1]
79
+ return json.loads(json_str)
80
+
81
+ raise ValueError(f"Could not parse JSON from response: {response_content[:200]}")