cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,422 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Alignment Prompt Builder for Semantic Verification.
18
+
19
+ This module constructs comprehensive prompts for LLM-based semantic alignment
20
+ verification between skill descriptions and their actual implementation behavior.
21
+
22
+ The prompt builder creates evidence-rich prompts that present:
23
+ - Skill description/claims (what the skill says it does)
24
+ - Actual behavior evidence (what static analysis shows it does)
25
+ - Supporting dataflow and call graph evidence
26
+ """
27
+
28
+ import json
29
+ import logging
30
+ import secrets
31
+ from pathlib import Path
32
+
33
+ from .....core.static_analysis.context_extractor import SkillFunctionContext
34
+
35
+
36
+ class AlignmentPromptBuilder:
37
+ """Builds comprehensive prompts for semantic alignment verification.
38
+
39
+ Constructs detailed prompts that provide LLMs with:
40
+ - Function metadata and signatures
41
+ - Parameter flow tracking evidence
42
+ - Function call sequences
43
+ - Cross-file call chains
44
+ - Security indicators (file ops, network ops, etc.)
45
+ - Control flow and data dependencies
46
+
47
+ Uses randomized delimiters to prevent prompt injection attacks.
48
+ """
49
+
50
+ # Configurable limits for prompt size
51
+ MAX_OPERATIONS_PER_PARAM = 10
52
+ MAX_FUNCTION_CALLS = 20
53
+ MAX_ASSIGNMENTS = 15
54
+ MAX_CROSS_FILE_CALLS = 10
55
+ MAX_REACHABLE_FILES = 5
56
+ MAX_CONSTANTS = 10
57
+ MAX_STRING_LITERALS = 15
58
+ MAX_REACHES_CALLS = 10
59
+
60
+ def __init__(
61
+ self,
62
+ max_operations: int | None = None,
63
+ max_calls: int | None = None,
64
+ max_assignments: int | None = None,
65
+ max_cross_file_calls: int | None = None,
66
+ max_reachable_files: int | None = None,
67
+ max_constants: int | None = None,
68
+ max_string_literals: int | None = None,
69
+ max_reaches_calls: int | None = None,
70
+ ):
71
+ """Initialize the alignment prompt builder.
72
+
73
+ Args:
74
+ max_operations: Maximum operations to show per parameter
75
+ max_calls: Maximum function calls to show
76
+ max_assignments: Maximum assignments to show
77
+ max_cross_file_calls: Maximum cross-file calls to show
78
+ max_reachable_files: Maximum reachable files to show
79
+ max_constants: Maximum constants to show
80
+ max_string_literals: Maximum string literals to show
81
+ max_reaches_calls: Maximum reaches calls to show
82
+ """
83
+ self.logger = logging.getLogger(__name__)
84
+ self._template = self._load_template()
85
+
86
+ # Use provided values or defaults
87
+ self.MAX_OPERATIONS_PER_PARAM = max_operations or self.MAX_OPERATIONS_PER_PARAM
88
+ self.MAX_FUNCTION_CALLS = max_calls or self.MAX_FUNCTION_CALLS
89
+ self.MAX_ASSIGNMENTS = max_assignments or self.MAX_ASSIGNMENTS
90
+ self.MAX_CROSS_FILE_CALLS = max_cross_file_calls or self.MAX_CROSS_FILE_CALLS
91
+ self.MAX_REACHABLE_FILES = max_reachable_files or self.MAX_REACHABLE_FILES
92
+ self.MAX_CONSTANTS = max_constants or self.MAX_CONSTANTS
93
+ self.MAX_STRING_LITERALS = max_string_literals or self.MAX_STRING_LITERALS
94
+ self.MAX_REACHES_CALLS = max_reaches_calls or self.MAX_REACHES_CALLS
95
+
96
+ def build_prompt(self, func_context: SkillFunctionContext, skill_description: str | None = None) -> str:
97
+ """Build comprehensive alignment verification prompt.
98
+
99
+ Args:
100
+ func_context: Complete function context with dataflow analysis
101
+ skill_description: Overall skill description from SKILL.md
102
+
103
+ Returns:
104
+ Formatted prompt string with evidence
105
+ """
106
+ # Generate random delimiter tags to prevent prompt injection
107
+ random_id = secrets.token_hex(16)
108
+ start_tag = f"<!---UNTRUSTED_INPUT_START_{random_id}--->"
109
+ end_tag = f"<!---UNTRUSTED_INPUT_END_{random_id}--->"
110
+
111
+ docstring = func_context.docstring or "No docstring provided"
112
+
113
+ # Build the analysis content using list accumulation for efficiency
114
+ content_parts = []
115
+
116
+ # Skill description (if provided)
117
+ if skill_description:
118
+ content_parts.append(f"""**SKILL DESCRIPTION (from SKILL.md):**
119
+ {skill_description}
120
+
121
+ """)
122
+
123
+ # Function information
124
+ content_parts.append(f"""**FUNCTION INFORMATION:**
125
+ - Function Name: {func_context.name}
126
+ - Line: {func_context.line_number}
127
+ - Docstring/Description: {docstring}
128
+
129
+ **FUNCTION SIGNATURE:**
130
+ - Parameters: {json.dumps(func_context.parameters, indent=2)}
131
+ - Return Type: {func_context.return_type or "Not specified"}
132
+ """)
133
+
134
+ # Add imports section
135
+ if func_context.imports:
136
+ import_parts = ["\n**IMPORTS:**\n"]
137
+ import_parts.append("The following libraries and modules are imported:\n")
138
+ for imp in func_context.imports:
139
+ import_parts.append(f" {imp}\n")
140
+ import_parts.append("\n")
141
+ content_parts.append("".join(import_parts))
142
+
143
+ content_parts.append("""
144
+ **DATAFLOW ANALYSIS:**
145
+ All parameters are treated as untrusted input (skill entry points receive external data).
146
+
147
+ Parameter Flow Tracking:
148
+ """)
149
+
150
+ # Add parameter flow tracking
151
+ if func_context.parameter_flows:
152
+ param_parts = ["\n**PARAMETER FLOW TRACKING:**\n"]
153
+ for flow in func_context.parameter_flows:
154
+ param_name = flow.get("parameter", "unknown")
155
+ param_parts.append(f"\nParameter '{param_name}' flows through:\n")
156
+
157
+ if flow.get("operations"):
158
+ param_parts.append(f" Operations ({len(flow['operations'])} total):\n")
159
+ for op in flow["operations"][: self.MAX_OPERATIONS_PER_PARAM]:
160
+ op_type = op.get("type", "unknown")
161
+ line = op.get("line", 0)
162
+ if op_type == "assignment":
163
+ param_parts.append(f" Line {line}: {op.get('target')} = {op.get('value')}\n")
164
+ elif op_type == "function_call":
165
+ param_parts.append(f" Line {line}: {op.get('function')}({op.get('argument')})\n")
166
+ elif op_type == "return":
167
+ param_parts.append(f" Line {line}: return {op.get('value')}\n")
168
+
169
+ if flow.get("reaches_calls"):
170
+ param_parts.append(
171
+ f" Reaches function calls: {', '.join(flow['reaches_calls'][: self.MAX_REACHES_CALLS])}\n"
172
+ )
173
+
174
+ if flow.get("reaches_external"):
175
+ param_parts.append(" [WARNING] REACHES EXTERNAL OPERATIONS (file/network/subprocess)\n")
176
+
177
+ if flow.get("reaches_returns"):
178
+ param_parts.append(" Returns to caller\n")
179
+
180
+ content_parts.append("".join(param_parts))
181
+
182
+ # Add variable dependencies
183
+ if func_context.variable_dependencies:
184
+ var_parts = ["\n**VARIABLE DEPENDENCIES:**\n"]
185
+ for var, deps in func_context.variable_dependencies.items():
186
+ var_parts.append(f" {var} depends on: {', '.join(deps)}\n")
187
+ content_parts.append("".join(var_parts))
188
+
189
+ # Add function calls
190
+ if func_context.function_calls:
191
+ call_parts = [f"\n**FUNCTION CALLS ({len(func_context.function_calls)} total):**\n"]
192
+ for call in func_context.function_calls[: self.MAX_FUNCTION_CALLS]:
193
+ try:
194
+ call_name = call.get("name", "unknown")
195
+ call_args = call.get("args", [])
196
+ call_line = call.get("line", 0)
197
+ call_parts.append(f" Line {call_line}: {call_name}({', '.join(str(a) for a in call_args)})\n")
198
+ except Exception:
199
+ continue
200
+ content_parts.append("".join(call_parts))
201
+
202
+ # Add assignments
203
+ if func_context.assignments:
204
+ assign_parts = [f"\n**ASSIGNMENTS ({len(func_context.assignments)} total):**\n"]
205
+ for assign in func_context.assignments[: self.MAX_ASSIGNMENTS]:
206
+ try:
207
+ line = assign.get("line", 0)
208
+ var = assign.get("variable", "unknown")
209
+ val = assign.get("value", "unknown")
210
+ assign_parts.append(f" Line {line}: {var} = {val}\n")
211
+ except Exception:
212
+ continue
213
+ content_parts.append("".join(assign_parts))
214
+
215
+ # Add control flow information
216
+ if func_context.control_flow:
217
+ content_parts.append(f"\n**CONTROL FLOW:**\n{json.dumps(func_context.control_flow, indent=2)}\n")
218
+
219
+ # Add cross-file analysis
220
+ if func_context.cross_file_calls:
221
+ cross_file_parts = [
222
+ f"\n**CROSS-FILE CALL CHAINS ({len(func_context.cross_file_calls)} calls to other files):**\n"
223
+ ]
224
+ cross_file_parts.append(
225
+ "[WARNING] This function calls functions from other files. Full call chains shown:\n\n"
226
+ )
227
+ for call in func_context.cross_file_calls[: self.MAX_CROSS_FILE_CALLS]:
228
+ try:
229
+ if "to_function" in call:
230
+ cross_file_parts.append(
231
+ f" {call.get('from_function', 'unknown')} -> {call.get('to_function', 'unknown')}\n"
232
+ )
233
+ cross_file_parts.append(f" From: {call.get('from_file', 'unknown')}\n")
234
+ cross_file_parts.append(f" To: {call.get('to_file', 'unknown')}\n")
235
+ else:
236
+ func_name = call.get("function", "unknown")
237
+ file_name = call.get("file", "unknown")
238
+ cross_file_parts.append(f" {func_name}() in {file_name}\n")
239
+ cross_file_parts.append("\n")
240
+ except Exception:
241
+ continue
242
+ cross_file_parts.append(
243
+ "Note: Analyze the entire call chain to understand what operations are performed.\n"
244
+ )
245
+ content_parts.append("".join(cross_file_parts))
246
+
247
+ # Add reachability analysis
248
+ if func_context.reachable_functions:
249
+ total_reachable = len(func_context.reachable_functions)
250
+ functions_by_file = {}
251
+ for func in func_context.reachable_functions:
252
+ if "::" in func:
253
+ file_path, func_name = func.rsplit("::", 1)
254
+ if file_path not in functions_by_file:
255
+ functions_by_file[file_path] = []
256
+ functions_by_file[file_path].append(func_name)
257
+
258
+ if len(functions_by_file) > 1:
259
+ reach_parts = ["\n**REACHABILITY ANALYSIS:**\n"]
260
+ reach_parts.append(
261
+ f"Total reachable functions: {total_reachable} across {len(functions_by_file)} file(s)\n\n"
262
+ )
263
+ for file_path, funcs in list(functions_by_file.items())[: self.MAX_REACHABLE_FILES]:
264
+ file_name = file_path.split("/")[-1] if "/" in file_path else file_path
265
+ reach_parts.append(f" {file_name}: {', '.join(funcs[:10])}\n")
266
+ if len(funcs) > 10:
267
+ reach_parts.append(f" ... and {len(funcs) - 10} more\n")
268
+ content_parts.append("".join(reach_parts))
269
+
270
+ # Add constants
271
+ if func_context.constants:
272
+ const_parts = ["\n**CONSTANTS:**\n"]
273
+ for var, val in list(func_context.constants.items())[: self.MAX_CONSTANTS]:
274
+ const_parts.append(f" {var} = {val}\n")
275
+ content_parts.append("".join(const_parts))
276
+
277
+ # Add string literals
278
+ if func_context.string_literals:
279
+ lit_parts = [f"\n**STRING LITERALS ({len(func_context.string_literals)} total):**\n"]
280
+ for literal in func_context.string_literals[: self.MAX_STRING_LITERALS]:
281
+ safe_literal = literal.replace("\n", "\\n").replace("\r", "\\r")[:150]
282
+ lit_parts.append(f' "{safe_literal}"\n')
283
+ content_parts.append("".join(lit_parts))
284
+
285
+ # Add return expressions
286
+ if func_context.return_expressions:
287
+ ret_parts = ["\n**RETURN EXPRESSIONS:**\n"]
288
+ if func_context.return_type:
289
+ ret_parts.append(f"Declared return type: {func_context.return_type}\n")
290
+ for ret_expr in func_context.return_expressions:
291
+ ret_parts.append(f" return {ret_expr}\n")
292
+ content_parts.append("".join(ret_parts))
293
+
294
+ # Add exception handling
295
+ if func_context.exception_handlers:
296
+ exc_parts = ["\n**EXCEPTION HANDLING:**\n"]
297
+ for handler in func_context.exception_handlers:
298
+ exc_parts.append(f" Line {handler['line']}: except {handler['exception_type']}")
299
+ if handler.get("is_silent"):
300
+ exc_parts.append(" ([WARNING] SILENT - just 'pass')\n")
301
+ else:
302
+ exc_parts.append("\n")
303
+ content_parts.append("".join(exc_parts))
304
+
305
+ # Add environment variable access
306
+ if func_context.env_var_access:
307
+ env_parts = ["\n**ENVIRONMENT VARIABLE ACCESS:**\n"]
308
+ env_parts.append("[WARNING] This function accesses environment variables:\n")
309
+ for env_access in func_context.env_var_access:
310
+ env_parts.append(f" {env_access}\n")
311
+ content_parts.append("".join(env_parts))
312
+
313
+ # Add global variable writes
314
+ if func_context.global_writes:
315
+ global_parts = ["\n**GLOBAL VARIABLE WRITES:**\n"]
316
+ global_parts.append("[WARNING] This function modifies global state:\n")
317
+ for gwrite in func_context.global_writes:
318
+ global_parts.append(f" Line {gwrite['line']}: global {gwrite['variable']} = {gwrite['value']}\n")
319
+ content_parts.append("".join(global_parts))
320
+
321
+ # Add attribute access
322
+ if func_context.attribute_access:
323
+ writes = [op for op in func_context.attribute_access if op.get("type") == "write"]
324
+ if writes:
325
+ attr_parts = ["\n**ATTRIBUTE WRITES:**\n"]
326
+ for op in writes[:10]:
327
+ attr_parts.append(f" Line {op['line']}: {op['object']}.{op['attribute']} = {op['value']}\n")
328
+ content_parts.append("".join(attr_parts))
329
+
330
+ # Join all content parts efficiently
331
+ analysis_content = "".join(content_parts)
332
+
333
+ # Security validation: Check that the untrusted input doesn't contain our delimiter tags
334
+ if start_tag in analysis_content or end_tag in analysis_content:
335
+ self.logger.warning(
336
+ f"Potential prompt injection detected in function {func_context.name}: Input contains delimiter tags"
337
+ )
338
+
339
+ # Wrap the untrusted content with randomized delimiters
340
+ prompt = f"""{self._template}
341
+
342
+ {start_tag}
343
+ {analysis_content}
344
+ {end_tag}
345
+ """
346
+
347
+ return prompt.strip()
348
+
349
+ def _load_template(self) -> str:
350
+ """Load the alignment verification prompt template.
351
+
352
+ Returns:
353
+ Prompt template string
354
+ """
355
+ try:
356
+ # Try to load from data/prompts directory
357
+ prompt_file = (
358
+ Path(__file__).parent.parent.parent.parent.parent
359
+ / "data"
360
+ / "prompts"
361
+ / "skill_alignment_analysis_prompt.md"
362
+ )
363
+
364
+ if prompt_file.is_file():
365
+ return prompt_file.read_text(encoding="utf-8")
366
+
367
+ # Fallback to embedded prompt
368
+ self.logger.warning("Prompt file not found, using embedded prompt")
369
+ return self._get_embedded_prompt()
370
+
371
+ except Exception as e:
372
+ self.logger.warning(f"Failed to load prompt file: {e}, using embedded prompt")
373
+ return self._get_embedded_prompt()
374
+
375
+ def _get_embedded_prompt(self) -> str:
376
+ """Get embedded fallback prompt template.
377
+
378
+ Returns:
379
+ Embedded prompt template
380
+ """
381
+ return """# Claude Skill Alignment Analysis
382
+
383
+ You are a security expert analyzing Claude Skills for alignment mismatches between their
384
+ described behavior and actual implementation.
385
+
386
+ ## Your Task
387
+
388
+ Analyze the provided code evidence and determine if there are any mismatches between:
389
+ 1. What the skill description/docstring claims the code does
390
+ 2. What the code actually does based on dataflow analysis
391
+
392
+ ## Analysis Guidelines
393
+
394
+ Focus on detecting:
395
+ - **Data Exfiltration**: Code that sends data to external servers not mentioned in description
396
+ - **Credential Theft**: Accessing credential files (.aws, .ssh) or environment variables
397
+ - **Hidden Network Calls**: Network operations not disclosed in skill description
398
+ - **Command Injection**: Executing arbitrary commands via user input
399
+ - **Unauthorized File Access**: Reading/writing files beyond stated scope
400
+
401
+ ## Response Format
402
+
403
+ Respond with valid JSON:
404
+
405
+ ```json
406
+ {
407
+ "mismatch_detected": true/false,
408
+ "threat_name": "THREAT_CATEGORY or empty",
409
+ "severity": "CRITICAL/HIGH/MEDIUM/LOW/INFO",
410
+ "confidence": "HIGH/MEDIUM/LOW",
411
+ "summary": "Brief description of the mismatch",
412
+ "description_claims": "What the skill claims to do",
413
+ "actual_behavior": "What the code actually does",
414
+ "security_implications": "Security impact",
415
+ "dataflow_evidence": "Key evidence from dataflow analysis"
416
+ }
417
+ ```
418
+
419
+ If no mismatch is detected, set mismatch_detected to false.
420
+
421
+ ## Evidence to Analyze
422
+ """
@@ -0,0 +1,136 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Alignment Response Validator for Semantic Verification.
18
+
19
+ This module validates and parses LLM responses from semantic alignment verification queries.
20
+
21
+ The validator:
22
+ - Parses JSON responses from LLM
23
+ - Validates response schema and required fields
24
+ - Handles parsing errors gracefully
25
+ """
26
+
27
+ import json
28
+ import logging
29
+ from typing import Any
30
+
31
+
32
+ class AlignmentResponseValidator:
33
+ """Validates alignment verification responses from LLM.
34
+
35
+ Ensures LLM responses are properly formatted JSON with required
36
+ alignment check fields.
37
+ """
38
+
39
+ def __init__(self):
40
+ """Initialize the alignment response validator."""
41
+ self.logger = logging.getLogger(__name__)
42
+
43
+ def validate(self, response: str) -> dict[str, Any] | None:
44
+ """Parse and validate alignment check response.
45
+
46
+ Args:
47
+ response: JSON response from LLM
48
+
49
+ Returns:
50
+ Parsed alignment check result or None if invalid
51
+ """
52
+ if not response or not response.strip():
53
+ self.logger.warning("Empty response from LLM")
54
+ return None
55
+
56
+ try:
57
+ # Try to parse JSON
58
+ data = json.loads(response)
59
+
60
+ # Validate it's a dictionary
61
+ if not isinstance(data, dict):
62
+ self.logger.warning(f"Response is not a JSON object: {type(data)}")
63
+ return None
64
+
65
+ # Check for required fields
66
+ if not self._has_required_fields(data):
67
+ self.logger.warning(f"Response missing required fields. Got: {list(data.keys())}")
68
+ return None
69
+
70
+ self.logger.debug(f"LLM response validated: mismatch_detected={data.get('mismatch_detected')}")
71
+ return data
72
+
73
+ except json.JSONDecodeError as e:
74
+ self.logger.warning(f"Invalid JSON response: {e}")
75
+ self.logger.debug(f"Raw response (first 500 chars): {response[:500]}")
76
+ # Try to extract JSON from markdown code blocks
77
+ return self._extract_json_from_markdown(response)
78
+ except Exception as e:
79
+ self.logger.error(f"Unexpected error validating response: {e}")
80
+ return None
81
+
82
+ def _has_required_fields(self, data: dict[str, Any]) -> bool:
83
+ """Check if response has all required alignment check fields.
84
+
85
+ Args:
86
+ data: Parsed JSON response
87
+
88
+ Returns:
89
+ True if all required fields present
90
+ """
91
+ required_fields = ["mismatch_detected"]
92
+
93
+ # Check required fields
94
+ if not all(field in data for field in required_fields):
95
+ return False
96
+
97
+ # If mismatch detected, check for additional required fields
98
+ if data.get("mismatch_detected"):
99
+ mismatch_required = ["confidence", "summary"]
100
+ if not all(field in data for field in mismatch_required):
101
+ return False
102
+
103
+ return True
104
+
105
+ def _extract_json_from_markdown(self, response: str) -> dict[str, Any] | None:
106
+ """Try to extract JSON from markdown code blocks.
107
+
108
+ Sometimes LLMs wrap JSON in ```json ... ``` blocks.
109
+
110
+ Args:
111
+ response: Response that may contain markdown
112
+
113
+ Returns:
114
+ Parsed JSON or None
115
+ """
116
+ try:
117
+ # Look for ```json ... ``` or ``` ... ```
118
+ if "```json" in response:
119
+ start = response.find("```json") + 7
120
+ end = response.find("```", start)
121
+ json_str = response[start:end].strip()
122
+ elif "```" in response:
123
+ start = response.find("```") + 3
124
+ end = response.find("```", start)
125
+ json_str = response[start:end].strip()
126
+ else:
127
+ return None
128
+
129
+ data = json.loads(json_str)
130
+ if isinstance(data, dict) and self._has_required_fields(data):
131
+ return data
132
+
133
+ except Exception:
134
+ pass
135
+
136
+ return None