cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Alignment Prompt Builder for Semantic Verification.
|
|
18
|
+
|
|
19
|
+
This module constructs comprehensive prompts for LLM-based semantic alignment
|
|
20
|
+
verification between skill descriptions and their actual implementation behavior.
|
|
21
|
+
|
|
22
|
+
The prompt builder creates evidence-rich prompts that present:
|
|
23
|
+
- Skill description/claims (what the skill says it does)
|
|
24
|
+
- Actual behavior evidence (what static analysis shows it does)
|
|
25
|
+
- Supporting dataflow and call graph evidence
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
import secrets
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
from .....core.static_analysis.context_extractor import SkillFunctionContext
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AlignmentPromptBuilder:
|
|
37
|
+
"""Builds comprehensive prompts for semantic alignment verification.
|
|
38
|
+
|
|
39
|
+
Constructs detailed prompts that provide LLMs with:
|
|
40
|
+
- Function metadata and signatures
|
|
41
|
+
- Parameter flow tracking evidence
|
|
42
|
+
- Function call sequences
|
|
43
|
+
- Cross-file call chains
|
|
44
|
+
- Security indicators (file ops, network ops, etc.)
|
|
45
|
+
- Control flow and data dependencies
|
|
46
|
+
|
|
47
|
+
Uses randomized delimiters to prevent prompt injection attacks.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# Configurable limits for prompt size
|
|
51
|
+
MAX_OPERATIONS_PER_PARAM = 10
|
|
52
|
+
MAX_FUNCTION_CALLS = 20
|
|
53
|
+
MAX_ASSIGNMENTS = 15
|
|
54
|
+
MAX_CROSS_FILE_CALLS = 10
|
|
55
|
+
MAX_REACHABLE_FILES = 5
|
|
56
|
+
MAX_CONSTANTS = 10
|
|
57
|
+
MAX_STRING_LITERALS = 15
|
|
58
|
+
MAX_REACHES_CALLS = 10
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
max_operations: int | None = None,
|
|
63
|
+
max_calls: int | None = None,
|
|
64
|
+
max_assignments: int | None = None,
|
|
65
|
+
max_cross_file_calls: int | None = None,
|
|
66
|
+
max_reachable_files: int | None = None,
|
|
67
|
+
max_constants: int | None = None,
|
|
68
|
+
max_string_literals: int | None = None,
|
|
69
|
+
max_reaches_calls: int | None = None,
|
|
70
|
+
):
|
|
71
|
+
"""Initialize the alignment prompt builder.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
max_operations: Maximum operations to show per parameter
|
|
75
|
+
max_calls: Maximum function calls to show
|
|
76
|
+
max_assignments: Maximum assignments to show
|
|
77
|
+
max_cross_file_calls: Maximum cross-file calls to show
|
|
78
|
+
max_reachable_files: Maximum reachable files to show
|
|
79
|
+
max_constants: Maximum constants to show
|
|
80
|
+
max_string_literals: Maximum string literals to show
|
|
81
|
+
max_reaches_calls: Maximum reaches calls to show
|
|
82
|
+
"""
|
|
83
|
+
self.logger = logging.getLogger(__name__)
|
|
84
|
+
self._template = self._load_template()
|
|
85
|
+
|
|
86
|
+
# Use provided values or defaults
|
|
87
|
+
self.MAX_OPERATIONS_PER_PARAM = max_operations or self.MAX_OPERATIONS_PER_PARAM
|
|
88
|
+
self.MAX_FUNCTION_CALLS = max_calls or self.MAX_FUNCTION_CALLS
|
|
89
|
+
self.MAX_ASSIGNMENTS = max_assignments or self.MAX_ASSIGNMENTS
|
|
90
|
+
self.MAX_CROSS_FILE_CALLS = max_cross_file_calls or self.MAX_CROSS_FILE_CALLS
|
|
91
|
+
self.MAX_REACHABLE_FILES = max_reachable_files or self.MAX_REACHABLE_FILES
|
|
92
|
+
self.MAX_CONSTANTS = max_constants or self.MAX_CONSTANTS
|
|
93
|
+
self.MAX_STRING_LITERALS = max_string_literals or self.MAX_STRING_LITERALS
|
|
94
|
+
self.MAX_REACHES_CALLS = max_reaches_calls or self.MAX_REACHES_CALLS
|
|
95
|
+
|
|
96
|
+
def build_prompt(self, func_context: SkillFunctionContext, skill_description: str | None = None) -> str:
|
|
97
|
+
"""Build comprehensive alignment verification prompt.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
func_context: Complete function context with dataflow analysis
|
|
101
|
+
skill_description: Overall skill description from SKILL.md
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Formatted prompt string with evidence
|
|
105
|
+
"""
|
|
106
|
+
# Generate random delimiter tags to prevent prompt injection
|
|
107
|
+
random_id = secrets.token_hex(16)
|
|
108
|
+
start_tag = f"<!---UNTRUSTED_INPUT_START_{random_id}--->"
|
|
109
|
+
end_tag = f"<!---UNTRUSTED_INPUT_END_{random_id}--->"
|
|
110
|
+
|
|
111
|
+
docstring = func_context.docstring or "No docstring provided"
|
|
112
|
+
|
|
113
|
+
# Build the analysis content using list accumulation for efficiency
|
|
114
|
+
content_parts = []
|
|
115
|
+
|
|
116
|
+
# Skill description (if provided)
|
|
117
|
+
if skill_description:
|
|
118
|
+
content_parts.append(f"""**SKILL DESCRIPTION (from SKILL.md):**
|
|
119
|
+
{skill_description}
|
|
120
|
+
|
|
121
|
+
""")
|
|
122
|
+
|
|
123
|
+
# Function information
|
|
124
|
+
content_parts.append(f"""**FUNCTION INFORMATION:**
|
|
125
|
+
- Function Name: {func_context.name}
|
|
126
|
+
- Line: {func_context.line_number}
|
|
127
|
+
- Docstring/Description: {docstring}
|
|
128
|
+
|
|
129
|
+
**FUNCTION SIGNATURE:**
|
|
130
|
+
- Parameters: {json.dumps(func_context.parameters, indent=2)}
|
|
131
|
+
- Return Type: {func_context.return_type or "Not specified"}
|
|
132
|
+
""")
|
|
133
|
+
|
|
134
|
+
# Add imports section
|
|
135
|
+
if func_context.imports:
|
|
136
|
+
import_parts = ["\n**IMPORTS:**\n"]
|
|
137
|
+
import_parts.append("The following libraries and modules are imported:\n")
|
|
138
|
+
for imp in func_context.imports:
|
|
139
|
+
import_parts.append(f" {imp}\n")
|
|
140
|
+
import_parts.append("\n")
|
|
141
|
+
content_parts.append("".join(import_parts))
|
|
142
|
+
|
|
143
|
+
content_parts.append("""
|
|
144
|
+
**DATAFLOW ANALYSIS:**
|
|
145
|
+
All parameters are treated as untrusted input (skill entry points receive external data).
|
|
146
|
+
|
|
147
|
+
Parameter Flow Tracking:
|
|
148
|
+
""")
|
|
149
|
+
|
|
150
|
+
# Add parameter flow tracking
|
|
151
|
+
if func_context.parameter_flows:
|
|
152
|
+
param_parts = ["\n**PARAMETER FLOW TRACKING:**\n"]
|
|
153
|
+
for flow in func_context.parameter_flows:
|
|
154
|
+
param_name = flow.get("parameter", "unknown")
|
|
155
|
+
param_parts.append(f"\nParameter '{param_name}' flows through:\n")
|
|
156
|
+
|
|
157
|
+
if flow.get("operations"):
|
|
158
|
+
param_parts.append(f" Operations ({len(flow['operations'])} total):\n")
|
|
159
|
+
for op in flow["operations"][: self.MAX_OPERATIONS_PER_PARAM]:
|
|
160
|
+
op_type = op.get("type", "unknown")
|
|
161
|
+
line = op.get("line", 0)
|
|
162
|
+
if op_type == "assignment":
|
|
163
|
+
param_parts.append(f" Line {line}: {op.get('target')} = {op.get('value')}\n")
|
|
164
|
+
elif op_type == "function_call":
|
|
165
|
+
param_parts.append(f" Line {line}: {op.get('function')}({op.get('argument')})\n")
|
|
166
|
+
elif op_type == "return":
|
|
167
|
+
param_parts.append(f" Line {line}: return {op.get('value')}\n")
|
|
168
|
+
|
|
169
|
+
if flow.get("reaches_calls"):
|
|
170
|
+
param_parts.append(
|
|
171
|
+
f" Reaches function calls: {', '.join(flow['reaches_calls'][: self.MAX_REACHES_CALLS])}\n"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if flow.get("reaches_external"):
|
|
175
|
+
param_parts.append(" [WARNING] REACHES EXTERNAL OPERATIONS (file/network/subprocess)\n")
|
|
176
|
+
|
|
177
|
+
if flow.get("reaches_returns"):
|
|
178
|
+
param_parts.append(" Returns to caller\n")
|
|
179
|
+
|
|
180
|
+
content_parts.append("".join(param_parts))
|
|
181
|
+
|
|
182
|
+
# Add variable dependencies
|
|
183
|
+
if func_context.variable_dependencies:
|
|
184
|
+
var_parts = ["\n**VARIABLE DEPENDENCIES:**\n"]
|
|
185
|
+
for var, deps in func_context.variable_dependencies.items():
|
|
186
|
+
var_parts.append(f" {var} depends on: {', '.join(deps)}\n")
|
|
187
|
+
content_parts.append("".join(var_parts))
|
|
188
|
+
|
|
189
|
+
# Add function calls
|
|
190
|
+
if func_context.function_calls:
|
|
191
|
+
call_parts = [f"\n**FUNCTION CALLS ({len(func_context.function_calls)} total):**\n"]
|
|
192
|
+
for call in func_context.function_calls[: self.MAX_FUNCTION_CALLS]:
|
|
193
|
+
try:
|
|
194
|
+
call_name = call.get("name", "unknown")
|
|
195
|
+
call_args = call.get("args", [])
|
|
196
|
+
call_line = call.get("line", 0)
|
|
197
|
+
call_parts.append(f" Line {call_line}: {call_name}({', '.join(str(a) for a in call_args)})\n")
|
|
198
|
+
except Exception:
|
|
199
|
+
continue
|
|
200
|
+
content_parts.append("".join(call_parts))
|
|
201
|
+
|
|
202
|
+
# Add assignments
|
|
203
|
+
if func_context.assignments:
|
|
204
|
+
assign_parts = [f"\n**ASSIGNMENTS ({len(func_context.assignments)} total):**\n"]
|
|
205
|
+
for assign in func_context.assignments[: self.MAX_ASSIGNMENTS]:
|
|
206
|
+
try:
|
|
207
|
+
line = assign.get("line", 0)
|
|
208
|
+
var = assign.get("variable", "unknown")
|
|
209
|
+
val = assign.get("value", "unknown")
|
|
210
|
+
assign_parts.append(f" Line {line}: {var} = {val}\n")
|
|
211
|
+
except Exception:
|
|
212
|
+
continue
|
|
213
|
+
content_parts.append("".join(assign_parts))
|
|
214
|
+
|
|
215
|
+
# Add control flow information
|
|
216
|
+
if func_context.control_flow:
|
|
217
|
+
content_parts.append(f"\n**CONTROL FLOW:**\n{json.dumps(func_context.control_flow, indent=2)}\n")
|
|
218
|
+
|
|
219
|
+
# Add cross-file analysis
|
|
220
|
+
if func_context.cross_file_calls:
|
|
221
|
+
cross_file_parts = [
|
|
222
|
+
f"\n**CROSS-FILE CALL CHAINS ({len(func_context.cross_file_calls)} calls to other files):**\n"
|
|
223
|
+
]
|
|
224
|
+
cross_file_parts.append(
|
|
225
|
+
"[WARNING] This function calls functions from other files. Full call chains shown:\n\n"
|
|
226
|
+
)
|
|
227
|
+
for call in func_context.cross_file_calls[: self.MAX_CROSS_FILE_CALLS]:
|
|
228
|
+
try:
|
|
229
|
+
if "to_function" in call:
|
|
230
|
+
cross_file_parts.append(
|
|
231
|
+
f" {call.get('from_function', 'unknown')} -> {call.get('to_function', 'unknown')}\n"
|
|
232
|
+
)
|
|
233
|
+
cross_file_parts.append(f" From: {call.get('from_file', 'unknown')}\n")
|
|
234
|
+
cross_file_parts.append(f" To: {call.get('to_file', 'unknown')}\n")
|
|
235
|
+
else:
|
|
236
|
+
func_name = call.get("function", "unknown")
|
|
237
|
+
file_name = call.get("file", "unknown")
|
|
238
|
+
cross_file_parts.append(f" {func_name}() in {file_name}\n")
|
|
239
|
+
cross_file_parts.append("\n")
|
|
240
|
+
except Exception:
|
|
241
|
+
continue
|
|
242
|
+
cross_file_parts.append(
|
|
243
|
+
"Note: Analyze the entire call chain to understand what operations are performed.\n"
|
|
244
|
+
)
|
|
245
|
+
content_parts.append("".join(cross_file_parts))
|
|
246
|
+
|
|
247
|
+
# Add reachability analysis
|
|
248
|
+
if func_context.reachable_functions:
|
|
249
|
+
total_reachable = len(func_context.reachable_functions)
|
|
250
|
+
functions_by_file = {}
|
|
251
|
+
for func in func_context.reachable_functions:
|
|
252
|
+
if "::" in func:
|
|
253
|
+
file_path, func_name = func.rsplit("::", 1)
|
|
254
|
+
if file_path not in functions_by_file:
|
|
255
|
+
functions_by_file[file_path] = []
|
|
256
|
+
functions_by_file[file_path].append(func_name)
|
|
257
|
+
|
|
258
|
+
if len(functions_by_file) > 1:
|
|
259
|
+
reach_parts = ["\n**REACHABILITY ANALYSIS:**\n"]
|
|
260
|
+
reach_parts.append(
|
|
261
|
+
f"Total reachable functions: {total_reachable} across {len(functions_by_file)} file(s)\n\n"
|
|
262
|
+
)
|
|
263
|
+
for file_path, funcs in list(functions_by_file.items())[: self.MAX_REACHABLE_FILES]:
|
|
264
|
+
file_name = file_path.split("/")[-1] if "/" in file_path else file_path
|
|
265
|
+
reach_parts.append(f" {file_name}: {', '.join(funcs[:10])}\n")
|
|
266
|
+
if len(funcs) > 10:
|
|
267
|
+
reach_parts.append(f" ... and {len(funcs) - 10} more\n")
|
|
268
|
+
content_parts.append("".join(reach_parts))
|
|
269
|
+
|
|
270
|
+
# Add constants
|
|
271
|
+
if func_context.constants:
|
|
272
|
+
const_parts = ["\n**CONSTANTS:**\n"]
|
|
273
|
+
for var, val in list(func_context.constants.items())[: self.MAX_CONSTANTS]:
|
|
274
|
+
const_parts.append(f" {var} = {val}\n")
|
|
275
|
+
content_parts.append("".join(const_parts))
|
|
276
|
+
|
|
277
|
+
# Add string literals
|
|
278
|
+
if func_context.string_literals:
|
|
279
|
+
lit_parts = [f"\n**STRING LITERALS ({len(func_context.string_literals)} total):**\n"]
|
|
280
|
+
for literal in func_context.string_literals[: self.MAX_STRING_LITERALS]:
|
|
281
|
+
safe_literal = literal.replace("\n", "\\n").replace("\r", "\\r")[:150]
|
|
282
|
+
lit_parts.append(f' "{safe_literal}"\n')
|
|
283
|
+
content_parts.append("".join(lit_parts))
|
|
284
|
+
|
|
285
|
+
# Add return expressions
|
|
286
|
+
if func_context.return_expressions:
|
|
287
|
+
ret_parts = ["\n**RETURN EXPRESSIONS:**\n"]
|
|
288
|
+
if func_context.return_type:
|
|
289
|
+
ret_parts.append(f"Declared return type: {func_context.return_type}\n")
|
|
290
|
+
for ret_expr in func_context.return_expressions:
|
|
291
|
+
ret_parts.append(f" return {ret_expr}\n")
|
|
292
|
+
content_parts.append("".join(ret_parts))
|
|
293
|
+
|
|
294
|
+
# Add exception handling
|
|
295
|
+
if func_context.exception_handlers:
|
|
296
|
+
exc_parts = ["\n**EXCEPTION HANDLING:**\n"]
|
|
297
|
+
for handler in func_context.exception_handlers:
|
|
298
|
+
exc_parts.append(f" Line {handler['line']}: except {handler['exception_type']}")
|
|
299
|
+
if handler.get("is_silent"):
|
|
300
|
+
exc_parts.append(" ([WARNING] SILENT - just 'pass')\n")
|
|
301
|
+
else:
|
|
302
|
+
exc_parts.append("\n")
|
|
303
|
+
content_parts.append("".join(exc_parts))
|
|
304
|
+
|
|
305
|
+
# Add environment variable access
|
|
306
|
+
if func_context.env_var_access:
|
|
307
|
+
env_parts = ["\n**ENVIRONMENT VARIABLE ACCESS:**\n"]
|
|
308
|
+
env_parts.append("[WARNING] This function accesses environment variables:\n")
|
|
309
|
+
for env_access in func_context.env_var_access:
|
|
310
|
+
env_parts.append(f" {env_access}\n")
|
|
311
|
+
content_parts.append("".join(env_parts))
|
|
312
|
+
|
|
313
|
+
# Add global variable writes
|
|
314
|
+
if func_context.global_writes:
|
|
315
|
+
global_parts = ["\n**GLOBAL VARIABLE WRITES:**\n"]
|
|
316
|
+
global_parts.append("[WARNING] This function modifies global state:\n")
|
|
317
|
+
for gwrite in func_context.global_writes:
|
|
318
|
+
global_parts.append(f" Line {gwrite['line']}: global {gwrite['variable']} = {gwrite['value']}\n")
|
|
319
|
+
content_parts.append("".join(global_parts))
|
|
320
|
+
|
|
321
|
+
# Add attribute access
|
|
322
|
+
if func_context.attribute_access:
|
|
323
|
+
writes = [op for op in func_context.attribute_access if op.get("type") == "write"]
|
|
324
|
+
if writes:
|
|
325
|
+
attr_parts = ["\n**ATTRIBUTE WRITES:**\n"]
|
|
326
|
+
for op in writes[:10]:
|
|
327
|
+
attr_parts.append(f" Line {op['line']}: {op['object']}.{op['attribute']} = {op['value']}\n")
|
|
328
|
+
content_parts.append("".join(attr_parts))
|
|
329
|
+
|
|
330
|
+
# Join all content parts efficiently
|
|
331
|
+
analysis_content = "".join(content_parts)
|
|
332
|
+
|
|
333
|
+
# Security validation: Check that the untrusted input doesn't contain our delimiter tags
|
|
334
|
+
if start_tag in analysis_content or end_tag in analysis_content:
|
|
335
|
+
self.logger.warning(
|
|
336
|
+
f"Potential prompt injection detected in function {func_context.name}: Input contains delimiter tags"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Wrap the untrusted content with randomized delimiters
|
|
340
|
+
prompt = f"""{self._template}
|
|
341
|
+
|
|
342
|
+
{start_tag}
|
|
343
|
+
{analysis_content}
|
|
344
|
+
{end_tag}
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
return prompt.strip()
|
|
348
|
+
|
|
349
|
+
def _load_template(self) -> str:
|
|
350
|
+
"""Load the alignment verification prompt template.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Prompt template string
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
# Try to load from data/prompts directory
|
|
357
|
+
prompt_file = (
|
|
358
|
+
Path(__file__).parent.parent.parent.parent.parent
|
|
359
|
+
/ "data"
|
|
360
|
+
/ "prompts"
|
|
361
|
+
/ "skill_alignment_analysis_prompt.md"
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if prompt_file.is_file():
|
|
365
|
+
return prompt_file.read_text(encoding="utf-8")
|
|
366
|
+
|
|
367
|
+
# Fallback to embedded prompt
|
|
368
|
+
self.logger.warning("Prompt file not found, using embedded prompt")
|
|
369
|
+
return self._get_embedded_prompt()
|
|
370
|
+
|
|
371
|
+
except Exception as e:
|
|
372
|
+
self.logger.warning(f"Failed to load prompt file: {e}, using embedded prompt")
|
|
373
|
+
return self._get_embedded_prompt()
|
|
374
|
+
|
|
375
|
+
def _get_embedded_prompt(self) -> str:
|
|
376
|
+
"""Get embedded fallback prompt template.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Embedded prompt template
|
|
380
|
+
"""
|
|
381
|
+
return """# Claude Skill Alignment Analysis
|
|
382
|
+
|
|
383
|
+
You are a security expert analyzing Claude Skills for alignment mismatches between their
|
|
384
|
+
described behavior and actual implementation.
|
|
385
|
+
|
|
386
|
+
## Your Task
|
|
387
|
+
|
|
388
|
+
Analyze the provided code evidence and determine if there are any mismatches between:
|
|
389
|
+
1. What the skill description/docstring claims the code does
|
|
390
|
+
2. What the code actually does based on dataflow analysis
|
|
391
|
+
|
|
392
|
+
## Analysis Guidelines
|
|
393
|
+
|
|
394
|
+
Focus on detecting:
|
|
395
|
+
- **Data Exfiltration**: Code that sends data to external servers not mentioned in description
|
|
396
|
+
- **Credential Theft**: Accessing credential files (.aws, .ssh) or environment variables
|
|
397
|
+
- **Hidden Network Calls**: Network operations not disclosed in skill description
|
|
398
|
+
- **Command Injection**: Executing arbitrary commands via user input
|
|
399
|
+
- **Unauthorized File Access**: Reading/writing files beyond stated scope
|
|
400
|
+
|
|
401
|
+
## Response Format
|
|
402
|
+
|
|
403
|
+
Respond with valid JSON:
|
|
404
|
+
|
|
405
|
+
```json
|
|
406
|
+
{
|
|
407
|
+
"mismatch_detected": true/false,
|
|
408
|
+
"threat_name": "THREAT_CATEGORY or empty",
|
|
409
|
+
"severity": "CRITICAL/HIGH/MEDIUM/LOW/INFO",
|
|
410
|
+
"confidence": "HIGH/MEDIUM/LOW",
|
|
411
|
+
"summary": "Brief description of the mismatch",
|
|
412
|
+
"description_claims": "What the skill claims to do",
|
|
413
|
+
"actual_behavior": "What the code actually does",
|
|
414
|
+
"security_implications": "Security impact",
|
|
415
|
+
"dataflow_evidence": "Key evidence from dataflow analysis"
|
|
416
|
+
}
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
If no mismatch is detected, set mismatch_detected to false.
|
|
420
|
+
|
|
421
|
+
## Evidence to Analyze
|
|
422
|
+
"""
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Alignment Response Validator for Semantic Verification.
|
|
18
|
+
|
|
19
|
+
This module validates and parses LLM responses from semantic alignment verification queries.
|
|
20
|
+
|
|
21
|
+
The validator:
|
|
22
|
+
- Parses JSON responses from LLM
|
|
23
|
+
- Validates response schema and required fields
|
|
24
|
+
- Handles parsing errors gracefully
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import logging
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AlignmentResponseValidator:
|
|
33
|
+
"""Validates alignment verification responses from LLM.
|
|
34
|
+
|
|
35
|
+
Ensures LLM responses are properly formatted JSON with required
|
|
36
|
+
alignment check fields.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize the alignment response validator."""
|
|
41
|
+
self.logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
def validate(self, response: str) -> dict[str, Any] | None:
|
|
44
|
+
"""Parse and validate alignment check response.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
response: JSON response from LLM
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Parsed alignment check result or None if invalid
|
|
51
|
+
"""
|
|
52
|
+
if not response or not response.strip():
|
|
53
|
+
self.logger.warning("Empty response from LLM")
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Try to parse JSON
|
|
58
|
+
data = json.loads(response)
|
|
59
|
+
|
|
60
|
+
# Validate it's a dictionary
|
|
61
|
+
if not isinstance(data, dict):
|
|
62
|
+
self.logger.warning(f"Response is not a JSON object: {type(data)}")
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
# Check for required fields
|
|
66
|
+
if not self._has_required_fields(data):
|
|
67
|
+
self.logger.warning(f"Response missing required fields. Got: {list(data.keys())}")
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
self.logger.debug(f"LLM response validated: mismatch_detected={data.get('mismatch_detected')}")
|
|
71
|
+
return data
|
|
72
|
+
|
|
73
|
+
except json.JSONDecodeError as e:
|
|
74
|
+
self.logger.warning(f"Invalid JSON response: {e}")
|
|
75
|
+
self.logger.debug(f"Raw response (first 500 chars): {response[:500]}")
|
|
76
|
+
# Try to extract JSON from markdown code blocks
|
|
77
|
+
return self._extract_json_from_markdown(response)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
self.logger.error(f"Unexpected error validating response: {e}")
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def _has_required_fields(self, data: dict[str, Any]) -> bool:
|
|
83
|
+
"""Check if response has all required alignment check fields.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
data: Parsed JSON response
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if all required fields present
|
|
90
|
+
"""
|
|
91
|
+
required_fields = ["mismatch_detected"]
|
|
92
|
+
|
|
93
|
+
# Check required fields
|
|
94
|
+
if not all(field in data for field in required_fields):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
# If mismatch detected, check for additional required fields
|
|
98
|
+
if data.get("mismatch_detected"):
|
|
99
|
+
mismatch_required = ["confidence", "summary"]
|
|
100
|
+
if not all(field in data for field in mismatch_required):
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
def _extract_json_from_markdown(self, response: str) -> dict[str, Any] | None:
|
|
106
|
+
"""Try to extract JSON from markdown code blocks.
|
|
107
|
+
|
|
108
|
+
Sometimes LLMs wrap JSON in ```json ... ``` blocks.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
response: Response that may contain markdown
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Parsed JSON or None
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
# Look for ```json ... ``` or ``` ... ```
|
|
118
|
+
if "```json" in response:
|
|
119
|
+
start = response.find("```json") + 7
|
|
120
|
+
end = response.find("```", start)
|
|
121
|
+
json_str = response[start:end].strip()
|
|
122
|
+
elif "```" in response:
|
|
123
|
+
start = response.find("```") + 3
|
|
124
|
+
end = response.find("```", start)
|
|
125
|
+
json_str = response[start:end].strip()
|
|
126
|
+
else:
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
data = json.loads(json_str)
|
|
130
|
+
if isinstance(data, dict) and self._has_required_fields(data):
|
|
131
|
+
return data
|
|
132
|
+
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
return None
|