cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,439 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Control Flow Graph (CFG) builder for dataflow analysis.
18
+
19
+ Builds control flow graphs from Python ASTs to enable accurate dataflow
20
+ analysis through control structures (if/else, loops, functions).
21
+ """
22
+
23
+ import ast
24
+ import logging
25
+ from typing import Any, Generic, TypeVar
26
+
27
+ from ..parser.python_parser import PythonParser
28
+
29
+ T = TypeVar("T")
30
+
31
+
32
+ class CFGNode:
33
+ """Control Flow Graph node."""
34
+
35
+ def __init__(self, node_id: int, ast_node: Any, label: str = "") -> None:
36
+ """Initialize CFG node.
37
+
38
+ Args:
39
+ node_id: Unique node ID
40
+ ast_node: Associated AST node
41
+ label: Optional label
42
+ """
43
+ self.id = node_id
44
+ self.ast_node = ast_node
45
+ self.label = label
46
+ self.predecessors: list[CFGNode] = []
47
+ self.successors: list[CFGNode] = []
48
+
49
+ def __repr__(self) -> str:
50
+ """String representation."""
51
+ return f"CFGNode({self.id}, {self.label})"
52
+
53
+
54
+ class ControlFlowGraph:
55
+ """Control Flow Graph."""
56
+
57
+ def __init__(self) -> None:
58
+ """Initialize CFG."""
59
+ self.nodes: list[CFGNode] = []
60
+ self.entry: CFGNode | None = None
61
+ self.exit: CFGNode | None = None
62
+ self._node_counter = 0
63
+
64
+ def create_node(self, ast_node: Any, label: str = "") -> CFGNode:
65
+ """Create a new CFG node.
66
+
67
+ Args:
68
+ ast_node: AST node
69
+ label: Optional label
70
+
71
+ Returns:
72
+ New CFG node
73
+ """
74
+ node = CFGNode(self._node_counter, ast_node, label)
75
+ self._node_counter += 1
76
+ self.nodes.append(node)
77
+ return node
78
+
79
+ def add_edge(self, from_node: CFGNode, to_node: CFGNode) -> None:
80
+ """Add an edge between two nodes.
81
+
82
+ Args:
83
+ from_node: Source node
84
+ to_node: Target node
85
+ """
86
+ from_node.successors.append(to_node)
87
+ to_node.predecessors.append(from_node)
88
+
89
+ def get_successors(self, node: CFGNode) -> list[CFGNode]:
90
+ """Get successor nodes.
91
+
92
+ Args:
93
+ node: CFG node
94
+
95
+ Returns:
96
+ List of successor nodes
97
+ """
98
+ return node.successors
99
+
100
+ def get_predecessors(self, node: CFGNode) -> list[CFGNode]:
101
+ """Get predecessor nodes.
102
+
103
+ Args:
104
+ node: CFG node
105
+
106
+ Returns:
107
+ List of predecessor nodes
108
+ """
109
+ return node.predecessors
110
+
111
+
112
+ class DataFlowAnalyzer(Generic[T]):
113
+ """Generic dataflow analysis framework."""
114
+
115
+ def __init__(self, parser: PythonParser) -> None:
116
+ """Initialize dataflow analyzer.
117
+
118
+ Args:
119
+ parser: Python parser instance
120
+ """
121
+ self.parser = parser
122
+ self.cfg: ControlFlowGraph | None = None
123
+ self.in_facts: dict[int, T] = {}
124
+ self.out_facts: dict[int, T] = {}
125
+ self.logger = logging.getLogger(__name__)
126
+
127
+ def build_cfg(self) -> ControlFlowGraph:
128
+ """Build Control Flow Graph from AST.
129
+
130
+ Returns:
131
+ Control Flow Graph
132
+ """
133
+ # Get AST from parser (PythonParser uses self.tree)
134
+ ast_root = getattr(self.parser, "tree", None)
135
+ if not ast_root:
136
+ self.logger.warning("Cannot build CFG: no AST available. Call parser.parse() first.")
137
+ return ControlFlowGraph()
138
+
139
+ # Clear old state when building a new CFG to prevent state leakage
140
+ self.in_facts.clear()
141
+ self.out_facts.clear()
142
+
143
+ cfg = ControlFlowGraph()
144
+ self._build_python_cfg(ast_root, cfg)
145
+ self.cfg = cfg
146
+ return cfg
147
+
148
+ def _build_python_cfg(self, node: ast.AST, cfg: ControlFlowGraph) -> CFGNode:
149
+ """Build CFG for Python AST.
150
+
151
+ Args:
152
+ node: Python AST node
153
+ cfg: Control Flow Graph
154
+
155
+ Returns:
156
+ Last CFG node created
157
+ """
158
+ if isinstance(node, ast.Module):
159
+ entry = cfg.create_node(node, "entry")
160
+ cfg.entry = entry
161
+
162
+ current = entry
163
+ for stmt in node.body:
164
+ next_node = self._build_python_cfg(stmt, cfg)
165
+ cfg.add_edge(current, next_node)
166
+ current = next_node
167
+
168
+ exit_node = cfg.create_node(node, "exit")
169
+ cfg.exit = exit_node
170
+ cfg.add_edge(current, exit_node)
171
+
172
+ return exit_node
173
+
174
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
175
+ # Build CFG for function body
176
+ entry = cfg.create_node(node, "func_entry")
177
+ if not cfg.entry:
178
+ cfg.entry = entry
179
+
180
+ current = entry
181
+ for stmt in node.body:
182
+ next_node = self._build_python_cfg(stmt, cfg)
183
+ cfg.add_edge(current, next_node)
184
+ current = next_node
185
+
186
+ exit_node = cfg.create_node(node, "func_exit")
187
+ if not cfg.exit:
188
+ cfg.exit = exit_node
189
+ cfg.add_edge(current, exit_node)
190
+
191
+ return exit_node
192
+
193
+ elif isinstance(node, ast.If):
194
+ cond_node = cfg.create_node(node.test, "if_cond")
195
+
196
+ then_entry = cfg.create_node(node, "then_entry")
197
+ cfg.add_edge(cond_node, then_entry)
198
+
199
+ then_current = then_entry
200
+ for stmt in node.body:
201
+ next_node = self._build_python_cfg(stmt, cfg)
202
+ cfg.add_edge(then_current, next_node)
203
+ then_current = next_node
204
+
205
+ if node.orelse:
206
+ else_entry = cfg.create_node(node, "else_entry")
207
+ cfg.add_edge(cond_node, else_entry)
208
+
209
+ else_current = else_entry
210
+ for stmt in node.orelse:
211
+ next_node = self._build_python_cfg(stmt, cfg)
212
+ cfg.add_edge(else_current, next_node)
213
+ else_current = next_node
214
+
215
+ merge = cfg.create_node(node, "if_merge")
216
+ cfg.add_edge(then_current, merge)
217
+ cfg.add_edge(else_current, merge)
218
+ return merge
219
+ else:
220
+ merge = cfg.create_node(node, "if_merge")
221
+ cfg.add_edge(then_current, merge)
222
+ cfg.add_edge(cond_node, merge)
223
+ return merge
224
+
225
+ elif isinstance(node, ast.While):
226
+ cond_node = cfg.create_node(node.test, "while_cond")
227
+
228
+ body_entry = cfg.create_node(node, "while_body")
229
+ cfg.add_edge(cond_node, body_entry)
230
+
231
+ body_current = body_entry
232
+ for stmt in node.body:
233
+ next_node = self._build_python_cfg(stmt, cfg)
234
+ cfg.add_edge(body_current, next_node)
235
+ body_current = next_node
236
+
237
+ cfg.add_edge(body_current, cond_node)
238
+
239
+ exit_node = cfg.create_node(node, "while_exit")
240
+ cfg.add_edge(cond_node, exit_node)
241
+
242
+ return exit_node
243
+
244
+ elif isinstance(node, ast.For):
245
+ iter_node = cfg.create_node(node.iter, "for_iter")
246
+
247
+ body_entry = cfg.create_node(node, "for_body")
248
+ cfg.add_edge(iter_node, body_entry)
249
+
250
+ body_current = body_entry
251
+ for stmt in node.body:
252
+ next_node = self._build_python_cfg(stmt, cfg)
253
+ cfg.add_edge(body_current, next_node)
254
+ body_current = next_node
255
+
256
+ cfg.add_edge(body_current, iter_node)
257
+
258
+ exit_node = cfg.create_node(node, "for_exit")
259
+ cfg.add_edge(iter_node, exit_node)
260
+
261
+ return exit_node
262
+
263
+ elif isinstance(node, ast.Try):
264
+ # Handle try/except/finally blocks
265
+ try_entry = cfg.create_node(node, "try_entry")
266
+ current = try_entry
267
+
268
+ # Try block
269
+ for stmt in node.body:
270
+ next_node = self._build_python_cfg(stmt, cfg)
271
+ cfg.add_edge(current, next_node)
272
+ current = next_node
273
+
274
+ # Exception handlers
275
+ if node.handlers:
276
+ for handler in node.handlers:
277
+ handler_entry = cfg.create_node(handler, "except_entry")
278
+ cfg.add_edge(try_entry, handler_entry)
279
+ handler_current = handler_entry
280
+ for stmt in handler.body:
281
+ next_node = self._build_python_cfg(stmt, cfg)
282
+ cfg.add_edge(handler_current, next_node)
283
+ handler_current = next_node
284
+ # Merge exception handlers back
285
+ cfg.add_edge(handler_current, current)
286
+
287
+ # Finally block
288
+ if node.finalbody:
289
+ finally_entry = cfg.create_node(node, "finally_entry")
290
+ cfg.add_edge(current, finally_entry)
291
+ finally_current = finally_entry
292
+ for stmt in node.finalbody:
293
+ next_node = self._build_python_cfg(stmt, cfg)
294
+ cfg.add_edge(finally_current, next_node)
295
+ finally_current = next_node
296
+ return finally_current
297
+
298
+ return current
299
+
300
+ else:
301
+ return cfg.create_node(node, type(node).__name__)
302
+
303
+ def analyze(self, initial_fact: T, forward: bool = True, max_iteration_multiplier: int = 1000) -> None:
304
+ """Run dataflow analysis using worklist algorithm.
305
+
306
+ Args:
307
+ initial_fact: Initial dataflow fact
308
+ forward: True for forward analysis, False for backward
309
+ max_iteration_multiplier: Base multiplier for max iterations (default: 1000, increased from 500)
310
+ Max iterations = CFG nodes * effective_multiplier
311
+ Adaptive limits based on CFG size to handle complex files
312
+ """
313
+ if not self.cfg:
314
+ self.build_cfg()
315
+
316
+ if not self.cfg or not self.cfg.nodes:
317
+ return
318
+
319
+ # Clear facts dictionaries to ensure clean state (defensive programming)
320
+ # This prevents any potential state leakage from previous analyses
321
+ self.in_facts.clear()
322
+ self.out_facts.clear()
323
+
324
+ for node in self.cfg.nodes:
325
+ self.in_facts[node.id] = initial_fact
326
+ self.out_facts[node.id] = initial_fact
327
+
328
+ worklist = list(self.cfg.nodes)
329
+ in_worklist = {node.id for node in worklist}
330
+
331
+ iteration_count = 0
332
+ cfg_size = len(self.cfg.nodes)
333
+ # Adaptive limit: Use higher multiplier for larger CFGs
334
+ # Very small CFGs (< 20 nodes): 1000x multiplier (10k max iterations)
335
+ # Small CFGs (20-50 nodes): 800x multiplier
336
+ # Medium CFGs (50-100 nodes): 600x multiplier
337
+ # Large CFGs (100-200 nodes): 400x multiplier
338
+ # Very large CFGs (> 200 nodes): 300x multiplier (but still allows 60k+ iterations)
339
+ if cfg_size < 20:
340
+ effective_multiplier = max_iteration_multiplier # 1000
341
+ elif cfg_size < 50:
342
+ effective_multiplier = int(max_iteration_multiplier * 0.8) # 800
343
+ elif cfg_size < 100:
344
+ effective_multiplier = int(max_iteration_multiplier * 0.6) # 600
345
+ elif cfg_size < 200:
346
+ effective_multiplier = int(max_iteration_multiplier * 0.4) # 400
347
+ else:
348
+ effective_multiplier = int(max_iteration_multiplier * 0.3) # 300
349
+ max_iterations = cfg_size * effective_multiplier # Safety limit
350
+
351
+ while worklist:
352
+ iteration_count += 1
353
+
354
+ # Safety check to prevent infinite loops
355
+ if iteration_count > max_iterations:
356
+ # Log at debug level to reduce noise - this is expected for complex files
357
+ # The analysis still completes, it just stops early at the safety limit
358
+ self.logger.debug(
359
+ f"Dataflow analysis exceeded max iterations ({max_iterations:,} iterations, "
360
+ f"CFG size: {cfg_size} nodes). Analysis stopped at safety limit. "
361
+ f"This is normal for complex control flow and analysis may be incomplete."
362
+ )
363
+ break
364
+
365
+ node = worklist.pop(0)
366
+ in_worklist.discard(node.id)
367
+
368
+ if forward:
369
+ pred_facts = [self.out_facts[pred.id] for pred in node.predecessors]
370
+ if pred_facts:
371
+ in_fact = self.merge(pred_facts)
372
+ else:
373
+ in_fact = initial_fact
374
+
375
+ self.in_facts[node.id] = in_fact
376
+
377
+ out_fact = self.transfer(node, in_fact)
378
+
379
+ if out_fact != self.out_facts[node.id]:
380
+ self.out_facts[node.id] = out_fact
381
+
382
+ for succ in node.successors:
383
+ if succ.id not in in_worklist:
384
+ worklist.append(succ)
385
+ in_worklist.add(succ.id)
386
+ else:
387
+ succ_facts = [self.in_facts[succ.id] for succ in node.successors]
388
+ if succ_facts:
389
+ out_fact = self.merge(succ_facts)
390
+ else:
391
+ out_fact = initial_fact
392
+
393
+ self.out_facts[node.id] = out_fact
394
+
395
+ in_fact = self.transfer(node, out_fact)
396
+
397
+ if in_fact != self.in_facts[node.id]:
398
+ self.in_facts[node.id] = in_fact
399
+
400
+ for pred in node.predecessors:
401
+ if pred.id not in in_worklist:
402
+ worklist.append(pred)
403
+ in_worklist.add(pred.id)
404
+
405
+ def transfer(self, node: CFGNode, in_fact: T) -> T:
406
+ """Transfer function for dataflow analysis.
407
+
408
+ Args:
409
+ node: CFG node
410
+ in_fact: Input dataflow fact
411
+
412
+ Returns:
413
+ Output dataflow fact
414
+ """
415
+ return in_fact
416
+
417
+ def merge(self, facts: list[T]) -> T:
418
+ """Merge multiple dataflow facts.
419
+
420
+ Args:
421
+ facts: List of facts to merge
422
+
423
+ Returns:
424
+ Merged fact
425
+ """
426
+ if facts:
427
+ return facts[0]
428
+ raise NotImplementedError("merge must be implemented by subclass")
429
+
430
+ def get_reaching_definitions(self, node: CFGNode) -> T:
431
+ """Get reaching definitions at a node.
432
+
433
+ Args:
434
+ node: CFG node
435
+
436
+ Returns:
437
+ Dataflow fact
438
+ """
439
+ return self.in_facts.get(node.id, self.in_facts.get(0) if self.in_facts else None) # type: ignore