cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,21 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Interprocedural analysis for cross-file function tracking."""
18
+
19
+ from .call_graph_analyzer import CallGraph, CallGraphAnalyzer
20
+
21
+ __all__ = ["CallGraph", "CallGraphAnalyzer"]
@@ -0,0 +1,406 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Cross-file analysis for Claude Skills.
18
+
19
+ Tracks how function parameters flow through function calls across multiple files.
20
+ This enables detection of data exfiltration patterns that span multiple scripts.
21
+ """
22
+
23
+ import ast
24
+ import logging
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+
29
+ class CallGraph:
30
+ """Call graph for cross-file analysis.
31
+
32
+ Tracks function definitions and call relationships across multiple files.
33
+ """
34
+
35
+ def __init__(self) -> None:
36
+ """Initialize call graph."""
37
+ self.functions: dict[str, Any] = {} # full_name -> function node
38
+ self.calls: list[tuple] = [] # (caller, callee) pairs
39
+ self.entry_points: set[str] = set() # Skill entry point functions
40
+
41
+ def add_function(self, name: str, node: Any, file_path: Path, is_entry_point: bool = False) -> None:
42
+ """Add a function definition.
43
+
44
+ Args:
45
+ name: Function name
46
+ node: Function definition node
47
+ file_path: File containing the function
48
+ is_entry_point: Whether this is a skill entry point
49
+ """
50
+ full_name = f"{file_path}::{name}"
51
+ self.functions[full_name] = node
52
+ if is_entry_point:
53
+ self.entry_points.add(full_name)
54
+
55
+ def add_call(self, caller: str, callee: str) -> None:
56
+ """Add a function call edge.
57
+
58
+ Args:
59
+ caller: Caller function name
60
+ callee: Callee function name
61
+ """
62
+ self.calls.append((caller, callee))
63
+
64
+ def get_callees(self, func_name: str) -> list[str]:
65
+ """Get functions called by a function.
66
+
67
+ Args:
68
+ func_name: Function name
69
+
70
+ Returns:
71
+ List of callee function names
72
+ """
73
+ return [callee for caller, callee in self.calls if caller == func_name]
74
+
75
+ def get_entry_points(self) -> set[str]:
76
+ """Get all entry point functions.
77
+
78
+ Returns:
79
+ Set of entry point function names
80
+ """
81
+ return self.entry_points.copy()
82
+
83
+
84
+ class CallGraphAnalyzer:
85
+ """Performs cross-file analysis for Claude Skills.
86
+
87
+ Tracks parameter flow from skill entry points through
88
+ the entire codebase across multiple files.
89
+ """
90
+
91
+ def __init__(self) -> None:
92
+ """Initialize cross-file analyzer."""
93
+ self.call_graph = CallGraph()
94
+ self.analyzers: dict[Path, ast.Module] = {} # file -> AST
95
+ self.import_map: dict[Path, list[Path]] = {} # file -> imported files
96
+ self.logger = logging.getLogger(__name__)
97
+
98
+ def add_file(self, file_path: Path, source_code: str) -> None:
99
+ """Add a file to the analysis.
100
+
101
+ Args:
102
+ file_path: Path to the file
103
+ source_code: Source code content
104
+ """
105
+ try:
106
+ tree = ast.parse(source_code)
107
+ self.analyzers[file_path] = tree
108
+
109
+ # Extract function definitions
110
+ self._extract_functions(file_path, tree)
111
+
112
+ # Extract imports
113
+ self._extract_imports(file_path, tree)
114
+ except SyntaxError as e:
115
+ self.logger.debug(f"Skipping unparseable file {file_path}: {e}")
116
+
117
+ def _extract_functions(self, file_path: Path, tree: ast.Module) -> None:
118
+ """Extract function definitions from Python file.
119
+
120
+ Args:
121
+ file_path: File path
122
+ tree: AST tree
123
+ """
124
+ # Extract top-level functions
125
+ for node in tree.body:
126
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
127
+ # Check if it looks like an entry point (has main-like name or is decorated)
128
+ is_entry = self._is_entry_point(node)
129
+ self.call_graph.add_function(node.name, node, file_path, is_entry)
130
+
131
+ # Extract class methods
132
+ for node in tree.body:
133
+ if isinstance(node, ast.ClassDef):
134
+ class_name = node.name
135
+ for item in node.body:
136
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
137
+ method_full_name = f"{class_name}.{item.name}"
138
+ self.call_graph.add_function(method_full_name, item, file_path, False)
139
+
140
+ def _is_entry_point(self, func_def: ast.FunctionDef) -> bool:
141
+ """Check if function is a skill entry point.
142
+
143
+ Entry points are identified by:
144
+ - Function name starts with main, run, or execute
145
+ - Function has decorators (common pattern for skills)
146
+
147
+ Args:
148
+ func_def: Function definition node
149
+
150
+ Returns:
151
+ True if entry point
152
+ """
153
+ # Check function name patterns
154
+ name_lower = func_def.name.lower()
155
+ if name_lower in ["main", "run", "execute", "process", "handle"]:
156
+ return True
157
+ if name_lower.startswith(("main_", "run_", "execute_", "process_", "handle_")):
158
+ return True
159
+
160
+ # Check for decorators (often indicate entry points)
161
+ if func_def.decorator_list:
162
+ return True
163
+
164
+ return False
165
+
166
+ def _extract_imports(self, file_path: Path, tree: ast.Module) -> None:
167
+ """Extract import relationships.
168
+
169
+ Args:
170
+ file_path: File path
171
+ tree: AST tree
172
+ """
173
+ imported_files = []
174
+
175
+ for node in ast.walk(tree):
176
+ if isinstance(node, ast.Import):
177
+ for alias in node.names:
178
+ module_name = alias.name
179
+ imported_file = self._resolve_import(file_path, module_name)
180
+ if imported_file:
181
+ imported_files.append(imported_file)
182
+ elif isinstance(node, ast.ImportFrom):
183
+ if node.module:
184
+ imported_file = self._resolve_import(file_path, node.module)
185
+ if imported_file:
186
+ imported_files.append(imported_file)
187
+
188
+ self.import_map[file_path] = imported_files
189
+
190
+ def _resolve_import(self, from_file: Path, module_name: str) -> Path | None:
191
+ """Resolve Python import to file path.
192
+
193
+ Args:
194
+ from_file: File doing the import
195
+ module_name: Module name
196
+
197
+ Returns:
198
+ Resolved file path or None
199
+ """
200
+ module_parts = module_name.split(".")
201
+ current_dir = from_file.parent
202
+
203
+ # Try relative to current file
204
+ for i in range(len(module_parts), 0, -1):
205
+ potential_path = current_dir / "/".join(module_parts[:i])
206
+
207
+ # Try as file
208
+ py_file = potential_path.with_suffix(".py")
209
+ if py_file.exists():
210
+ return py_file
211
+
212
+ # Try as package
213
+ init_file = potential_path / "__init__.py"
214
+ if init_file.exists():
215
+ return init_file
216
+
217
+ return None
218
+
219
+ def build_call_graph(self) -> CallGraph:
220
+ """Build the complete call graph.
221
+
222
+ Returns:
223
+ Call graph
224
+ """
225
+ # Extract function calls from each file
226
+ for file_path, tree in self.analyzers.items():
227
+ self._extract_calls(file_path, tree)
228
+
229
+ return self.call_graph
230
+
231
+ def _extract_calls(self, file_path: Path, tree: ast.Module) -> None:
232
+ """Extract function calls from Python file.
233
+
234
+ Args:
235
+ file_path: File path
236
+ tree: AST tree
237
+ """
238
+ # Extract calls from top-level functions
239
+ for node in tree.body:
240
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
241
+ caller_name = f"{file_path}::{node.name}"
242
+ self._extract_calls_from_function(file_path, node, caller_name)
243
+
244
+ # Extract calls from class methods
245
+ for node in tree.body:
246
+ if isinstance(node, ast.ClassDef):
247
+ class_name = node.name
248
+ for item in node.body:
249
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
250
+ caller_name = f"{file_path}::{class_name}.{item.name}"
251
+ self._extract_calls_from_function(file_path, item, caller_name)
252
+
253
+ def _extract_calls_from_function(self, file_path: Path, func_node: ast.FunctionDef, caller_name: str) -> None:
254
+ """Extract calls from a single function.
255
+
256
+ Args:
257
+ file_path: File path
258
+ func_node: Function AST node
259
+ caller_name: Full caller name
260
+ """
261
+ for node in ast.walk(func_node):
262
+ if isinstance(node, ast.Call):
263
+ callee_name = self._get_call_name(node)
264
+
265
+ # Try to resolve to full name
266
+ full_callee = self._resolve_call_target(file_path, callee_name)
267
+
268
+ if full_callee:
269
+ self.call_graph.add_call(caller_name, full_callee)
270
+ else:
271
+ # Add with partial name
272
+ self.call_graph.add_call(caller_name, callee_name)
273
+
274
+ def _get_call_name(self, node: ast.Call) -> str:
275
+ """Get function call name.
276
+
277
+ Args:
278
+ node: Call node
279
+
280
+ Returns:
281
+ Function name
282
+ """
283
+ if isinstance(node.func, ast.Name):
284
+ return node.func.id
285
+ elif isinstance(node.func, ast.Attribute):
286
+ parts = []
287
+ current = node.func
288
+ while isinstance(current, ast.Attribute):
289
+ parts.append(current.attr)
290
+ current = current.value
291
+ if isinstance(current, ast.Name):
292
+ parts.append(current.id)
293
+ return ".".join(reversed(parts))
294
+ try:
295
+ return ast.unparse(node.func)
296
+ except (AttributeError, TypeError, ValueError):
297
+ return "<unknown>"
298
+
299
+ def _resolve_call_target(self, file_path: Path, call_name: str) -> str | None:
300
+ """Resolve a function call to its full qualified name.
301
+
302
+ Args:
303
+ file_path: File where call occurs
304
+ call_name: Function call name
305
+
306
+ Returns:
307
+ Full qualified name or None
308
+ """
309
+ # Check if it's defined in the same file
310
+ for func_name in self.call_graph.functions.keys():
311
+ if func_name.endswith(f"::{call_name}"):
312
+ if func_name.startswith(str(file_path)):
313
+ return func_name
314
+
315
+ # Check imported files
316
+ if file_path in self.import_map:
317
+ for imported_file in self.import_map[file_path]:
318
+ potential_name = f"{imported_file}::{call_name}"
319
+ if potential_name in self.call_graph.functions:
320
+ return potential_name
321
+
322
+ return None
323
+
324
+ def get_reachable_functions(self, start_func: str) -> list[str]:
325
+ """Get all functions reachable from a starting function.
326
+
327
+ Args:
328
+ start_func: Starting function
329
+
330
+ Returns:
331
+ List of reachable function names
332
+ """
333
+ reachable = set()
334
+ to_visit = [start_func]
335
+ visited = set()
336
+
337
+ while to_visit:
338
+ current = to_visit.pop()
339
+ if current in visited:
340
+ continue
341
+
342
+ visited.add(current)
343
+ reachable.add(current)
344
+
345
+ # Add all callees
346
+ callees = self.call_graph.get_callees(current)
347
+ for callee in callees:
348
+ if callee not in visited:
349
+ to_visit.append(callee)
350
+
351
+ return list(reachable)
352
+
353
+ def analyze_parameter_flow_across_files(self, entry_point: str, param_names: list[str]) -> dict[str, Any]:
354
+ """Analyze how parameters flow across files from an entry point.
355
+
356
+ Args:
357
+ entry_point: Entry point function name
358
+ param_names: Parameter names to track
359
+
360
+ Returns:
361
+ Dictionary with cross-file flow information
362
+ """
363
+ # Get all reachable functions
364
+ reachable = self.get_reachable_functions(entry_point)
365
+
366
+ # Track parameter-influenced functions
367
+ param_influenced_funcs = set()
368
+ cross_file_flows = []
369
+
370
+ for func_name in reachable:
371
+ if func_name == entry_point:
372
+ continue
373
+
374
+ # Check if this function is called from entry point or influenced functions
375
+ for caller, callee in self.call_graph.calls:
376
+ if callee == func_name and (caller == entry_point or caller in param_influenced_funcs):
377
+ param_influenced_funcs.add(func_name)
378
+
379
+ # Extract file information
380
+ caller_file = caller.split("::")[0] if "::" in caller else "unknown"
381
+ callee_file = callee.split("::")[0] if "::" in callee else "unknown"
382
+
383
+ if caller_file != callee_file:
384
+ cross_file_flows.append(
385
+ {
386
+ "from_function": caller,
387
+ "to_function": callee,
388
+ "from_file": caller_file,
389
+ "to_file": callee_file,
390
+ }
391
+ )
392
+
393
+ return {
394
+ "reachable_functions": reachable,
395
+ "param_influenced_functions": list(param_influenced_funcs),
396
+ "cross_file_flows": cross_file_flows,
397
+ "total_files_involved": len(set(f.split("::")[0] for f in reachable if "::" in f)),
398
+ }
399
+
400
+ def get_all_files(self) -> list[Path]:
401
+ """Get all files in the analysis.
402
+
403
+ Returns:
404
+ List of file paths
405
+ """
406
+ return list(self.analyzers.keys())
@@ -0,0 +1,190 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Cross-file correlation analyzer for detecting multi-step attacks.
19
+
20
+ Tracks how data flows across multiple Python files in a skill package.
21
+ """
22
+
23
+ from dataclasses import dataclass, field
24
+ from typing import Any
25
+
26
+ from ..context_extractor import SkillScriptContext
27
+
28
+
29
+ @dataclass
30
+ class CrossFileCorrelation:
31
+ """Represents a correlated threat across multiple files."""
32
+
33
+ threat_type: str # "exfiltration_chain", "collection_pipeline"
34
+ severity: str
35
+ files_involved: list[str] = field(default_factory=list)
36
+ evidence: dict[str, Any] = field(default_factory=dict)
37
+ description: str = ""
38
+
39
+
40
+ class CrossFileAnalyzer:
41
+ """
42
+ Analyzes correlations across multiple files in a skill package.
43
+
44
+ Detects multi-step attacks like:
45
+ 1. File A: Collects credentials/env vars
46
+ 2. File B: Encodes data
47
+ 3. File C: Sends to network
48
+ """
49
+
50
+ def __init__(self):
51
+ self.file_contexts: dict[str, SkillScriptContext] = {}
52
+ self.correlations: list[CrossFileCorrelation] = []
53
+
54
+ def add_file_context(self, file_name: str, context: SkillScriptContext):
55
+ """Add a file's context for correlation analysis."""
56
+ self.file_contexts[file_name] = context
57
+
58
+ def analyze_correlations(self) -> list[CrossFileCorrelation]:
59
+ """
60
+ Analyze all files together to find multi-step attack patterns.
61
+
62
+ Returns:
63
+ List of detected cross-file correlations
64
+ """
65
+ self.correlations = []
66
+
67
+ # Pattern 1: Collection → Encoding → Exfiltration chain
68
+ self._detect_exfiltration_chain()
69
+
70
+ # Pattern 2: Credential access in one file + Network in another
71
+ self._detect_credential_network_separation()
72
+
73
+ # Pattern 3: Environment harvesting + Network transmission
74
+ self._detect_env_var_exfiltration_chain()
75
+
76
+ return self.correlations
77
+
78
+ def _detect_exfiltration_chain(self):
79
+ """
80
+ Detect Collection → Encoding → Network chain across files.
81
+
82
+ Pattern:
83
+ - File A: has_env_var_access or has_credential_access
84
+ - File B: has encoding (base64, json)
85
+ - File C: has_network
86
+ """
87
+ has_collection = []
88
+ has_encoding = []
89
+ has_network = []
90
+
91
+ for file_name, context in self.file_contexts.items():
92
+ if context.has_env_var_access or context.has_credential_access:
93
+ has_collection.append(file_name)
94
+
95
+ # Check for encoding operations
96
+ if any("base64" in call or "encode" in call for call in context.all_function_calls):
97
+ has_encoding.append(file_name)
98
+
99
+ if context.has_network:
100
+ has_network.append(file_name)
101
+
102
+ # If we have all three stages across different files
103
+ if has_collection and has_network and len(self.file_contexts) > 1:
104
+ correlation = CrossFileCorrelation(
105
+ threat_type="exfiltration_chain",
106
+ severity="CRITICAL",
107
+ files_involved=list(set(has_collection + has_encoding + has_network)),
108
+ evidence={
109
+ "collection_files": has_collection,
110
+ "encoding_files": has_encoding,
111
+ "network_files": has_network,
112
+ },
113
+ description=f"Multi-file exfiltration chain detected: {', '.join(has_collection)} collect data → {', '.join(has_encoding) if has_encoding else 'encode'} → {', '.join(has_network)} transmit to network",
114
+ )
115
+ self.correlations.append(correlation)
116
+
117
+ def _detect_credential_network_separation(self):
118
+ """
119
+ Detect credential access separated from network calls.
120
+
121
+ This is a common evasion technique: put credential access in one file
122
+ and network transmission in another to avoid simple pattern detection.
123
+ """
124
+ credential_files = []
125
+ network_files = []
126
+
127
+ for file_name, context in self.file_contexts.items():
128
+ if context.has_credential_access:
129
+ credential_files.append(file_name)
130
+ if context.has_network:
131
+ network_files.append(file_name)
132
+
133
+ # If credentials and network are in DIFFERENT files
134
+ if credential_files and network_files and not set(credential_files) & set(network_files):
135
+ correlation = CrossFileCorrelation(
136
+ threat_type="credential_network_separation",
137
+ severity="HIGH",
138
+ files_involved=credential_files + network_files,
139
+ evidence={
140
+ "credential_files": credential_files,
141
+ "network_files": network_files,
142
+ },
143
+ description=f"Credential access ({', '.join(credential_files)}) separated from network transmission ({', '.join(network_files)}) - possible evasion technique",
144
+ )
145
+ self.correlations.append(correlation)
146
+
147
+ def _detect_env_var_exfiltration_chain(self):
148
+ """
149
+ Detect environment variable harvesting + network transmission across files.
150
+
151
+ Pattern:
152
+ - File A: Iterates os.environ collecting secrets
153
+ - File B: Has network calls
154
+ - Together: Likely exfiltrating environment variables
155
+ """
156
+ env_var_files = []
157
+ network_files = []
158
+
159
+ for file_name, context in self.file_contexts.items():
160
+ if context.has_env_var_access:
161
+ env_var_files.append(file_name)
162
+ if context.has_network:
163
+ network_files.append(file_name)
164
+
165
+ # If env vars and network exist (even in same or different files)
166
+ if env_var_files and network_files:
167
+ # Check if they're in different files (more sophisticated)
168
+ if not set(env_var_files) & set(network_files):
169
+ severity = "CRITICAL"
170
+ desc = f"Environment variable harvesting ({', '.join(env_var_files)}) separated from network transmission ({', '.join(network_files)}) across files"
171
+ else:
172
+ # Same file - less sophisticated but still dangerous
173
+ severity = "CRITICAL"
174
+ desc = f"Environment variable access with network calls in {', '.join(env_var_files)}"
175
+
176
+ correlation = CrossFileCorrelation(
177
+ threat_type="env_var_exfiltration",
178
+ severity=severity,
179
+ files_involved=list(set(env_var_files + network_files)),
180
+ evidence={
181
+ "env_var_files": env_var_files,
182
+ "network_files": network_files,
183
+ },
184
+ description=desc,
185
+ )
186
+ self.correlations.append(correlation)
187
+
188
+ def get_critical_correlations(self) -> list[CrossFileCorrelation]:
189
+ """Get only CRITICAL severity correlations."""
190
+ return [c for c in self.correlations if c.severity == "CRITICAL"]
@@ -0,0 +1,21 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Python AST parsers for static analysis."""
18
+
19
+ from .python_parser import FunctionInfo, PythonParser
20
+
21
+ __all__ = ["PythonParser", "FunctionInfo"]