cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Interprocedural analysis for cross-file function tracking."""
|
|
18
|
+
|
|
19
|
+
from .call_graph_analyzer import CallGraph, CallGraphAnalyzer
|
|
20
|
+
|
|
21
|
+
__all__ = ["CallGraph", "CallGraphAnalyzer"]
|
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Cross-file analysis for Claude Skills.
|
|
18
|
+
|
|
19
|
+
Tracks how function parameters flow through function calls across multiple files.
|
|
20
|
+
This enables detection of data exfiltration patterns that span multiple scripts.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import ast
|
|
24
|
+
import logging
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CallGraph:
|
|
30
|
+
"""Call graph for cross-file analysis.
|
|
31
|
+
|
|
32
|
+
Tracks function definitions and call relationships across multiple files.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
"""Initialize call graph."""
|
|
37
|
+
self.functions: dict[str, Any] = {} # full_name -> function node
|
|
38
|
+
self.calls: list[tuple] = [] # (caller, callee) pairs
|
|
39
|
+
self.entry_points: set[str] = set() # Skill entry point functions
|
|
40
|
+
|
|
41
|
+
def add_function(self, name: str, node: Any, file_path: Path, is_entry_point: bool = False) -> None:
|
|
42
|
+
"""Add a function definition.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
name: Function name
|
|
46
|
+
node: Function definition node
|
|
47
|
+
file_path: File containing the function
|
|
48
|
+
is_entry_point: Whether this is a skill entry point
|
|
49
|
+
"""
|
|
50
|
+
full_name = f"{file_path}::{name}"
|
|
51
|
+
self.functions[full_name] = node
|
|
52
|
+
if is_entry_point:
|
|
53
|
+
self.entry_points.add(full_name)
|
|
54
|
+
|
|
55
|
+
def add_call(self, caller: str, callee: str) -> None:
|
|
56
|
+
"""Add a function call edge.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
caller: Caller function name
|
|
60
|
+
callee: Callee function name
|
|
61
|
+
"""
|
|
62
|
+
self.calls.append((caller, callee))
|
|
63
|
+
|
|
64
|
+
def get_callees(self, func_name: str) -> list[str]:
|
|
65
|
+
"""Get functions called by a function.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
func_name: Function name
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
List of callee function names
|
|
72
|
+
"""
|
|
73
|
+
return [callee for caller, callee in self.calls if caller == func_name]
|
|
74
|
+
|
|
75
|
+
def get_entry_points(self) -> set[str]:
|
|
76
|
+
"""Get all entry point functions.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Set of entry point function names
|
|
80
|
+
"""
|
|
81
|
+
return self.entry_points.copy()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class CallGraphAnalyzer:
|
|
85
|
+
"""Performs cross-file analysis for Claude Skills.
|
|
86
|
+
|
|
87
|
+
Tracks parameter flow from skill entry points through
|
|
88
|
+
the entire codebase across multiple files.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self) -> None:
|
|
92
|
+
"""Initialize cross-file analyzer."""
|
|
93
|
+
self.call_graph = CallGraph()
|
|
94
|
+
self.analyzers: dict[Path, ast.Module] = {} # file -> AST
|
|
95
|
+
self.import_map: dict[Path, list[Path]] = {} # file -> imported files
|
|
96
|
+
self.logger = logging.getLogger(__name__)
|
|
97
|
+
|
|
98
|
+
def add_file(self, file_path: Path, source_code: str) -> None:
|
|
99
|
+
"""Add a file to the analysis.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
file_path: Path to the file
|
|
103
|
+
source_code: Source code content
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
tree = ast.parse(source_code)
|
|
107
|
+
self.analyzers[file_path] = tree
|
|
108
|
+
|
|
109
|
+
# Extract function definitions
|
|
110
|
+
self._extract_functions(file_path, tree)
|
|
111
|
+
|
|
112
|
+
# Extract imports
|
|
113
|
+
self._extract_imports(file_path, tree)
|
|
114
|
+
except SyntaxError as e:
|
|
115
|
+
self.logger.debug(f"Skipping unparseable file {file_path}: {e}")
|
|
116
|
+
|
|
117
|
+
def _extract_functions(self, file_path: Path, tree: ast.Module) -> None:
|
|
118
|
+
"""Extract function definitions from Python file.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
file_path: File path
|
|
122
|
+
tree: AST tree
|
|
123
|
+
"""
|
|
124
|
+
# Extract top-level functions
|
|
125
|
+
for node in tree.body:
|
|
126
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
127
|
+
# Check if it looks like an entry point (has main-like name or is decorated)
|
|
128
|
+
is_entry = self._is_entry_point(node)
|
|
129
|
+
self.call_graph.add_function(node.name, node, file_path, is_entry)
|
|
130
|
+
|
|
131
|
+
# Extract class methods
|
|
132
|
+
for node in tree.body:
|
|
133
|
+
if isinstance(node, ast.ClassDef):
|
|
134
|
+
class_name = node.name
|
|
135
|
+
for item in node.body:
|
|
136
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
137
|
+
method_full_name = f"{class_name}.{item.name}"
|
|
138
|
+
self.call_graph.add_function(method_full_name, item, file_path, False)
|
|
139
|
+
|
|
140
|
+
def _is_entry_point(self, func_def: ast.FunctionDef) -> bool:
|
|
141
|
+
"""Check if function is a skill entry point.
|
|
142
|
+
|
|
143
|
+
Entry points are identified by:
|
|
144
|
+
- Function name starts with main, run, or execute
|
|
145
|
+
- Function has decorators (common pattern for skills)
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
func_def: Function definition node
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
True if entry point
|
|
152
|
+
"""
|
|
153
|
+
# Check function name patterns
|
|
154
|
+
name_lower = func_def.name.lower()
|
|
155
|
+
if name_lower in ["main", "run", "execute", "process", "handle"]:
|
|
156
|
+
return True
|
|
157
|
+
if name_lower.startswith(("main_", "run_", "execute_", "process_", "handle_")):
|
|
158
|
+
return True
|
|
159
|
+
|
|
160
|
+
# Check for decorators (often indicate entry points)
|
|
161
|
+
if func_def.decorator_list:
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
def _extract_imports(self, file_path: Path, tree: ast.Module) -> None:
|
|
167
|
+
"""Extract import relationships.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
file_path: File path
|
|
171
|
+
tree: AST tree
|
|
172
|
+
"""
|
|
173
|
+
imported_files = []
|
|
174
|
+
|
|
175
|
+
for node in ast.walk(tree):
|
|
176
|
+
if isinstance(node, ast.Import):
|
|
177
|
+
for alias in node.names:
|
|
178
|
+
module_name = alias.name
|
|
179
|
+
imported_file = self._resolve_import(file_path, module_name)
|
|
180
|
+
if imported_file:
|
|
181
|
+
imported_files.append(imported_file)
|
|
182
|
+
elif isinstance(node, ast.ImportFrom):
|
|
183
|
+
if node.module:
|
|
184
|
+
imported_file = self._resolve_import(file_path, node.module)
|
|
185
|
+
if imported_file:
|
|
186
|
+
imported_files.append(imported_file)
|
|
187
|
+
|
|
188
|
+
self.import_map[file_path] = imported_files
|
|
189
|
+
|
|
190
|
+
def _resolve_import(self, from_file: Path, module_name: str) -> Path | None:
|
|
191
|
+
"""Resolve Python import to file path.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
from_file: File doing the import
|
|
195
|
+
module_name: Module name
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Resolved file path or None
|
|
199
|
+
"""
|
|
200
|
+
module_parts = module_name.split(".")
|
|
201
|
+
current_dir = from_file.parent
|
|
202
|
+
|
|
203
|
+
# Try relative to current file
|
|
204
|
+
for i in range(len(module_parts), 0, -1):
|
|
205
|
+
potential_path = current_dir / "/".join(module_parts[:i])
|
|
206
|
+
|
|
207
|
+
# Try as file
|
|
208
|
+
py_file = potential_path.with_suffix(".py")
|
|
209
|
+
if py_file.exists():
|
|
210
|
+
return py_file
|
|
211
|
+
|
|
212
|
+
# Try as package
|
|
213
|
+
init_file = potential_path / "__init__.py"
|
|
214
|
+
if init_file.exists():
|
|
215
|
+
return init_file
|
|
216
|
+
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
def build_call_graph(self) -> CallGraph:
|
|
220
|
+
"""Build the complete call graph.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Call graph
|
|
224
|
+
"""
|
|
225
|
+
# Extract function calls from each file
|
|
226
|
+
for file_path, tree in self.analyzers.items():
|
|
227
|
+
self._extract_calls(file_path, tree)
|
|
228
|
+
|
|
229
|
+
return self.call_graph
|
|
230
|
+
|
|
231
|
+
def _extract_calls(self, file_path: Path, tree: ast.Module) -> None:
|
|
232
|
+
"""Extract function calls from Python file.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
file_path: File path
|
|
236
|
+
tree: AST tree
|
|
237
|
+
"""
|
|
238
|
+
# Extract calls from top-level functions
|
|
239
|
+
for node in tree.body:
|
|
240
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
241
|
+
caller_name = f"{file_path}::{node.name}"
|
|
242
|
+
self._extract_calls_from_function(file_path, node, caller_name)
|
|
243
|
+
|
|
244
|
+
# Extract calls from class methods
|
|
245
|
+
for node in tree.body:
|
|
246
|
+
if isinstance(node, ast.ClassDef):
|
|
247
|
+
class_name = node.name
|
|
248
|
+
for item in node.body:
|
|
249
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
250
|
+
caller_name = f"{file_path}::{class_name}.{item.name}"
|
|
251
|
+
self._extract_calls_from_function(file_path, item, caller_name)
|
|
252
|
+
|
|
253
|
+
def _extract_calls_from_function(self, file_path: Path, func_node: ast.FunctionDef, caller_name: str) -> None:
|
|
254
|
+
"""Extract calls from a single function.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
file_path: File path
|
|
258
|
+
func_node: Function AST node
|
|
259
|
+
caller_name: Full caller name
|
|
260
|
+
"""
|
|
261
|
+
for node in ast.walk(func_node):
|
|
262
|
+
if isinstance(node, ast.Call):
|
|
263
|
+
callee_name = self._get_call_name(node)
|
|
264
|
+
|
|
265
|
+
# Try to resolve to full name
|
|
266
|
+
full_callee = self._resolve_call_target(file_path, callee_name)
|
|
267
|
+
|
|
268
|
+
if full_callee:
|
|
269
|
+
self.call_graph.add_call(caller_name, full_callee)
|
|
270
|
+
else:
|
|
271
|
+
# Add with partial name
|
|
272
|
+
self.call_graph.add_call(caller_name, callee_name)
|
|
273
|
+
|
|
274
|
+
def _get_call_name(self, node: ast.Call) -> str:
|
|
275
|
+
"""Get function call name.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
node: Call node
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Function name
|
|
282
|
+
"""
|
|
283
|
+
if isinstance(node.func, ast.Name):
|
|
284
|
+
return node.func.id
|
|
285
|
+
elif isinstance(node.func, ast.Attribute):
|
|
286
|
+
parts = []
|
|
287
|
+
current = node.func
|
|
288
|
+
while isinstance(current, ast.Attribute):
|
|
289
|
+
parts.append(current.attr)
|
|
290
|
+
current = current.value
|
|
291
|
+
if isinstance(current, ast.Name):
|
|
292
|
+
parts.append(current.id)
|
|
293
|
+
return ".".join(reversed(parts))
|
|
294
|
+
try:
|
|
295
|
+
return ast.unparse(node.func)
|
|
296
|
+
except (AttributeError, TypeError, ValueError):
|
|
297
|
+
return "<unknown>"
|
|
298
|
+
|
|
299
|
+
def _resolve_call_target(self, file_path: Path, call_name: str) -> str | None:
|
|
300
|
+
"""Resolve a function call to its full qualified name.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
file_path: File where call occurs
|
|
304
|
+
call_name: Function call name
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Full qualified name or None
|
|
308
|
+
"""
|
|
309
|
+
# Check if it's defined in the same file
|
|
310
|
+
for func_name in self.call_graph.functions.keys():
|
|
311
|
+
if func_name.endswith(f"::{call_name}"):
|
|
312
|
+
if func_name.startswith(str(file_path)):
|
|
313
|
+
return func_name
|
|
314
|
+
|
|
315
|
+
# Check imported files
|
|
316
|
+
if file_path in self.import_map:
|
|
317
|
+
for imported_file in self.import_map[file_path]:
|
|
318
|
+
potential_name = f"{imported_file}::{call_name}"
|
|
319
|
+
if potential_name in self.call_graph.functions:
|
|
320
|
+
return potential_name
|
|
321
|
+
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
def get_reachable_functions(self, start_func: str) -> list[str]:
|
|
325
|
+
"""Get all functions reachable from a starting function.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
start_func: Starting function
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
List of reachable function names
|
|
332
|
+
"""
|
|
333
|
+
reachable = set()
|
|
334
|
+
to_visit = [start_func]
|
|
335
|
+
visited = set()
|
|
336
|
+
|
|
337
|
+
while to_visit:
|
|
338
|
+
current = to_visit.pop()
|
|
339
|
+
if current in visited:
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
visited.add(current)
|
|
343
|
+
reachable.add(current)
|
|
344
|
+
|
|
345
|
+
# Add all callees
|
|
346
|
+
callees = self.call_graph.get_callees(current)
|
|
347
|
+
for callee in callees:
|
|
348
|
+
if callee not in visited:
|
|
349
|
+
to_visit.append(callee)
|
|
350
|
+
|
|
351
|
+
return list(reachable)
|
|
352
|
+
|
|
353
|
+
def analyze_parameter_flow_across_files(self, entry_point: str, param_names: list[str]) -> dict[str, Any]:
|
|
354
|
+
"""Analyze how parameters flow across files from an entry point.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
entry_point: Entry point function name
|
|
358
|
+
param_names: Parameter names to track
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Dictionary with cross-file flow information
|
|
362
|
+
"""
|
|
363
|
+
# Get all reachable functions
|
|
364
|
+
reachable = self.get_reachable_functions(entry_point)
|
|
365
|
+
|
|
366
|
+
# Track parameter-influenced functions
|
|
367
|
+
param_influenced_funcs = set()
|
|
368
|
+
cross_file_flows = []
|
|
369
|
+
|
|
370
|
+
for func_name in reachable:
|
|
371
|
+
if func_name == entry_point:
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
# Check if this function is called from entry point or influenced functions
|
|
375
|
+
for caller, callee in self.call_graph.calls:
|
|
376
|
+
if callee == func_name and (caller == entry_point or caller in param_influenced_funcs):
|
|
377
|
+
param_influenced_funcs.add(func_name)
|
|
378
|
+
|
|
379
|
+
# Extract file information
|
|
380
|
+
caller_file = caller.split("::")[0] if "::" in caller else "unknown"
|
|
381
|
+
callee_file = callee.split("::")[0] if "::" in callee else "unknown"
|
|
382
|
+
|
|
383
|
+
if caller_file != callee_file:
|
|
384
|
+
cross_file_flows.append(
|
|
385
|
+
{
|
|
386
|
+
"from_function": caller,
|
|
387
|
+
"to_function": callee,
|
|
388
|
+
"from_file": caller_file,
|
|
389
|
+
"to_file": callee_file,
|
|
390
|
+
}
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
return {
|
|
394
|
+
"reachable_functions": reachable,
|
|
395
|
+
"param_influenced_functions": list(param_influenced_funcs),
|
|
396
|
+
"cross_file_flows": cross_file_flows,
|
|
397
|
+
"total_files_involved": len(set(f.split("::")[0] for f in reachable if "::" in f)),
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
def get_all_files(self) -> list[Path]:
|
|
401
|
+
"""Get all files in the analysis.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
List of file paths
|
|
405
|
+
"""
|
|
406
|
+
return list(self.analyzers.keys())
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Cross-file correlation analyzer for detecting multi-step attacks.
|
|
19
|
+
|
|
20
|
+
Tracks how data flows across multiple Python files in a skill package.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from ..context_extractor import SkillScriptContext
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class CrossFileCorrelation:
|
|
31
|
+
"""Represents a correlated threat across multiple files."""
|
|
32
|
+
|
|
33
|
+
threat_type: str # "exfiltration_chain", "collection_pipeline"
|
|
34
|
+
severity: str
|
|
35
|
+
files_involved: list[str] = field(default_factory=list)
|
|
36
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
description: str = ""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CrossFileAnalyzer:
|
|
41
|
+
"""
|
|
42
|
+
Analyzes correlations across multiple files in a skill package.
|
|
43
|
+
|
|
44
|
+
Detects multi-step attacks like:
|
|
45
|
+
1. File A: Collects credentials/env vars
|
|
46
|
+
2. File B: Encodes data
|
|
47
|
+
3. File C: Sends to network
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self):
|
|
51
|
+
self.file_contexts: dict[str, SkillScriptContext] = {}
|
|
52
|
+
self.correlations: list[CrossFileCorrelation] = []
|
|
53
|
+
|
|
54
|
+
def add_file_context(self, file_name: str, context: SkillScriptContext):
|
|
55
|
+
"""Add a file's context for correlation analysis."""
|
|
56
|
+
self.file_contexts[file_name] = context
|
|
57
|
+
|
|
58
|
+
def analyze_correlations(self) -> list[CrossFileCorrelation]:
|
|
59
|
+
"""
|
|
60
|
+
Analyze all files together to find multi-step attack patterns.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List of detected cross-file correlations
|
|
64
|
+
"""
|
|
65
|
+
self.correlations = []
|
|
66
|
+
|
|
67
|
+
# Pattern 1: Collection → Encoding → Exfiltration chain
|
|
68
|
+
self._detect_exfiltration_chain()
|
|
69
|
+
|
|
70
|
+
# Pattern 2: Credential access in one file + Network in another
|
|
71
|
+
self._detect_credential_network_separation()
|
|
72
|
+
|
|
73
|
+
# Pattern 3: Environment harvesting + Network transmission
|
|
74
|
+
self._detect_env_var_exfiltration_chain()
|
|
75
|
+
|
|
76
|
+
return self.correlations
|
|
77
|
+
|
|
78
|
+
def _detect_exfiltration_chain(self):
|
|
79
|
+
"""
|
|
80
|
+
Detect Collection → Encoding → Network chain across files.
|
|
81
|
+
|
|
82
|
+
Pattern:
|
|
83
|
+
- File A: has_env_var_access or has_credential_access
|
|
84
|
+
- File B: has encoding (base64, json)
|
|
85
|
+
- File C: has_network
|
|
86
|
+
"""
|
|
87
|
+
has_collection = []
|
|
88
|
+
has_encoding = []
|
|
89
|
+
has_network = []
|
|
90
|
+
|
|
91
|
+
for file_name, context in self.file_contexts.items():
|
|
92
|
+
if context.has_env_var_access or context.has_credential_access:
|
|
93
|
+
has_collection.append(file_name)
|
|
94
|
+
|
|
95
|
+
# Check for encoding operations
|
|
96
|
+
if any("base64" in call or "encode" in call for call in context.all_function_calls):
|
|
97
|
+
has_encoding.append(file_name)
|
|
98
|
+
|
|
99
|
+
if context.has_network:
|
|
100
|
+
has_network.append(file_name)
|
|
101
|
+
|
|
102
|
+
# If we have all three stages across different files
|
|
103
|
+
if has_collection and has_network and len(self.file_contexts) > 1:
|
|
104
|
+
correlation = CrossFileCorrelation(
|
|
105
|
+
threat_type="exfiltration_chain",
|
|
106
|
+
severity="CRITICAL",
|
|
107
|
+
files_involved=list(set(has_collection + has_encoding + has_network)),
|
|
108
|
+
evidence={
|
|
109
|
+
"collection_files": has_collection,
|
|
110
|
+
"encoding_files": has_encoding,
|
|
111
|
+
"network_files": has_network,
|
|
112
|
+
},
|
|
113
|
+
description=f"Multi-file exfiltration chain detected: {', '.join(has_collection)} collect data → {', '.join(has_encoding) if has_encoding else 'encode'} → {', '.join(has_network)} transmit to network",
|
|
114
|
+
)
|
|
115
|
+
self.correlations.append(correlation)
|
|
116
|
+
|
|
117
|
+
def _detect_credential_network_separation(self):
|
|
118
|
+
"""
|
|
119
|
+
Detect credential access separated from network calls.
|
|
120
|
+
|
|
121
|
+
This is a common evasion technique: put credential access in one file
|
|
122
|
+
and network transmission in another to avoid simple pattern detection.
|
|
123
|
+
"""
|
|
124
|
+
credential_files = []
|
|
125
|
+
network_files = []
|
|
126
|
+
|
|
127
|
+
for file_name, context in self.file_contexts.items():
|
|
128
|
+
if context.has_credential_access:
|
|
129
|
+
credential_files.append(file_name)
|
|
130
|
+
if context.has_network:
|
|
131
|
+
network_files.append(file_name)
|
|
132
|
+
|
|
133
|
+
# If credentials and network are in DIFFERENT files
|
|
134
|
+
if credential_files and network_files and not set(credential_files) & set(network_files):
|
|
135
|
+
correlation = CrossFileCorrelation(
|
|
136
|
+
threat_type="credential_network_separation",
|
|
137
|
+
severity="HIGH",
|
|
138
|
+
files_involved=credential_files + network_files,
|
|
139
|
+
evidence={
|
|
140
|
+
"credential_files": credential_files,
|
|
141
|
+
"network_files": network_files,
|
|
142
|
+
},
|
|
143
|
+
description=f"Credential access ({', '.join(credential_files)}) separated from network transmission ({', '.join(network_files)}) - possible evasion technique",
|
|
144
|
+
)
|
|
145
|
+
self.correlations.append(correlation)
|
|
146
|
+
|
|
147
|
+
def _detect_env_var_exfiltration_chain(self):
|
|
148
|
+
"""
|
|
149
|
+
Detect environment variable harvesting + network transmission across files.
|
|
150
|
+
|
|
151
|
+
Pattern:
|
|
152
|
+
- File A: Iterates os.environ collecting secrets
|
|
153
|
+
- File B: Has network calls
|
|
154
|
+
- Together: Likely exfiltrating environment variables
|
|
155
|
+
"""
|
|
156
|
+
env_var_files = []
|
|
157
|
+
network_files = []
|
|
158
|
+
|
|
159
|
+
for file_name, context in self.file_contexts.items():
|
|
160
|
+
if context.has_env_var_access:
|
|
161
|
+
env_var_files.append(file_name)
|
|
162
|
+
if context.has_network:
|
|
163
|
+
network_files.append(file_name)
|
|
164
|
+
|
|
165
|
+
# If env vars and network exist (even in same or different files)
|
|
166
|
+
if env_var_files and network_files:
|
|
167
|
+
# Check if they're in different files (more sophisticated)
|
|
168
|
+
if not set(env_var_files) & set(network_files):
|
|
169
|
+
severity = "CRITICAL"
|
|
170
|
+
desc = f"Environment variable harvesting ({', '.join(env_var_files)}) separated from network transmission ({', '.join(network_files)}) across files"
|
|
171
|
+
else:
|
|
172
|
+
# Same file - less sophisticated but still dangerous
|
|
173
|
+
severity = "CRITICAL"
|
|
174
|
+
desc = f"Environment variable access with network calls in {', '.join(env_var_files)}"
|
|
175
|
+
|
|
176
|
+
correlation = CrossFileCorrelation(
|
|
177
|
+
threat_type="env_var_exfiltration",
|
|
178
|
+
severity=severity,
|
|
179
|
+
files_involved=list(set(env_var_files + network_files)),
|
|
180
|
+
evidence={
|
|
181
|
+
"env_var_files": env_var_files,
|
|
182
|
+
"network_files": network_files,
|
|
183
|
+
},
|
|
184
|
+
description=desc,
|
|
185
|
+
)
|
|
186
|
+
self.correlations.append(correlation)
|
|
187
|
+
|
|
188
|
+
def get_critical_correlations(self) -> list[CrossFileCorrelation]:
|
|
189
|
+
"""Get only CRITICAL severity correlations."""
|
|
190
|
+
return [c for c in self.correlations if c.severity == "CRITICAL"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Python AST parsers for static analysis."""
|
|
18
|
+
|
|
19
|
+
from .python_parser import FunctionInfo, PythonParser
|
|
20
|
+
|
|
21
|
+
__all__ = ["PythonParser", "FunctionInfo"]
|