cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,742 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Context extractor for Claude Skills behavioral analysis.
19
+
20
+ Extracts comprehensive security context from skill scripts for LLM analysis.
21
+ """
22
+
23
+ import ast
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+ from .dataflow.forward_analysis import ForwardDataflowAnalysis
29
+ from .parser.python_parser import FunctionInfo, PythonParser
30
+
31
+
32
+ @dataclass
33
+ class SkillScriptContext:
34
+ """Complete security context for a skill script."""
35
+
36
+ file_path: str
37
+ functions: list[FunctionInfo]
38
+ imports: list[str]
39
+ dataflows: list[dict[str, Any]] = field(default_factory=list) # Empty - pattern detection used instead
40
+
41
+ # Security indicators (aggregated from all functions)
42
+ has_network: bool = False
43
+ has_file_ops: bool = False
44
+ has_subprocess: bool = False
45
+ has_eval_exec: bool = False
46
+ has_credential_access: bool = False
47
+ has_env_var_access: bool = False
48
+
49
+ # Dangerous patterns (simple pattern matching results)
50
+ dangerous_flows: list[dict[str, Any]] = field(default_factory=list)
51
+ has_exfiltration_chain: bool = False
52
+ has_injection_chain: bool = False
53
+
54
+ # Evidence for LLM
55
+ all_function_calls: list[str] = field(default_factory=list)
56
+ all_string_literals: list[str] = field(default_factory=list)
57
+ suspicious_urls: list[str] = field(default_factory=list)
58
+
59
+ def to_dict(self) -> dict[str, Any]:
60
+ """Convert to dictionary for LLM prompt."""
61
+ return {
62
+ "file_path": self.file_path,
63
+ "function_count": len(self.functions),
64
+ "imports": self.imports,
65
+ "security_indicators": {
66
+ "has_network": self.has_network,
67
+ "has_file_ops": self.has_file_ops,
68
+ "has_subprocess": self.has_subprocess,
69
+ "has_eval_exec": self.has_eval_exec,
70
+ "has_credential_access": self.has_credential_access,
71
+ "has_env_var_access": self.has_env_var_access,
72
+ },
73
+ "dangerous_patterns": {
74
+ "exfiltration_chain": self.has_exfiltration_chain,
75
+ "injection_chain": self.has_injection_chain,
76
+ "dangerous_flow_count": len(self.dangerous_flows),
77
+ },
78
+ "functions": [
79
+ {
80
+ "name": f.name,
81
+ "parameters": f.parameters,
82
+ "has_network": f.has_network_calls,
83
+ "has_file_ops": f.has_file_operations,
84
+ "has_subprocess": f.has_subprocess,
85
+ "has_eval_exec": f.has_eval_exec,
86
+ "calls": f.function_calls[:10], # First 10
87
+ }
88
+ for f in self.functions
89
+ ],
90
+ "suspicious_urls": self.suspicious_urls,
91
+ }
92
+
93
+
94
+ @dataclass
95
+ class SkillFunctionContext:
96
+ """Complete context for a single function (for alignment verification).
97
+
98
+ This dataclass contains rich analysis data for a single function,
99
+ including dataflow analysis, parameter tracking, and behavioral patterns.
100
+ Used by the alignment verification layer to detect description/code mismatches.
101
+ """
102
+
103
+ # Required fields (no defaults)
104
+ name: str
105
+ imports: list[str]
106
+ function_calls: list[dict[str, Any]]
107
+ assignments: list[dict[str, Any]]
108
+ control_flow: dict[str, Any]
109
+ parameter_flows: list[dict[str, Any]] # All paths from parameters
110
+ constants: dict[str, Any]
111
+ variable_dependencies: dict[str, list[str]]
112
+ has_file_operations: bool
113
+ has_network_operations: bool
114
+ has_subprocess_calls: bool
115
+ has_eval_exec: bool
116
+
117
+ # Optional fields (with defaults)
118
+ docstring: str | None = None
119
+ parameters: list[dict[str, Any]] = field(default_factory=list)
120
+ return_type: str | None = None
121
+ line_number: int = 0
122
+
123
+ # Cross-file analysis
124
+ cross_file_calls: list[dict[str, Any]] = field(default_factory=list)
125
+ reachable_functions: list[str] = field(default_factory=list)
126
+
127
+ # High-value security indicators
128
+ string_literals: list[str] = field(default_factory=list)
129
+ return_expressions: list[str] = field(default_factory=list)
130
+ exception_handlers: list[dict[str, Any]] = field(default_factory=list)
131
+ env_var_access: list[str] = field(default_factory=list)
132
+
133
+ # State manipulation
134
+ global_writes: list[dict[str, Any]] = field(default_factory=list)
135
+ attribute_access: list[dict[str, Any]] = field(default_factory=list)
136
+
137
+ # Dataflow facts
138
+ dataflow_summary: dict[str, Any] = field(default_factory=dict)
139
+
140
+
141
+ class ContextExtractor:
142
+ """Extract comprehensive security context from skill scripts."""
143
+
144
+ SUSPICIOUS_DOMAINS = ["attacker.com", "evil.com", "malicious.com", "pastebin.com"]
145
+
146
+ # Legitimate domains that should NOT be flagged as suspicious
147
+ LEGITIMATE_DOMAINS = [
148
+ # Claude/Anthropic services
149
+ "api.anthropic.com",
150
+ "statsig.anthropic.com",
151
+ # Code repositories
152
+ "github.com",
153
+ "gitlab.com",
154
+ "bitbucket.org",
155
+ # Package registries
156
+ "registry.npmjs.org",
157
+ "npmjs.com",
158
+ "npmjs.org",
159
+ "yarnpkg.com",
160
+ "registry.yarnpkg.com",
161
+ "pypi.org",
162
+ "files.pythonhosted.org",
163
+ "pythonhosted.org",
164
+ # System packages
165
+ "archive.ubuntu.com",
166
+ "security.ubuntu.com",
167
+ # XML schemas (for OOXML document processing)
168
+ "schemas.microsoft.com",
169
+ "schemas.openxmlformats.org",
170
+ "www.w3.org",
171
+ "purl.org",
172
+ # Localhost and development
173
+ "localhost",
174
+ "127.0.0.1",
175
+ "0.0.0.0",
176
+ ]
177
+
178
+ def extract_context(self, file_path: Path, source_code: str) -> SkillScriptContext:
179
+ """
180
+ Extract complete security context from a script.
181
+
182
+ Args:
183
+ file_path: Path to the script file
184
+ source_code: Python source code
185
+
186
+ Returns:
187
+ SkillScriptContext with extracted information
188
+ """
189
+ # Parse with AST parser
190
+ parser = PythonParser(source_code)
191
+ if not parser.parse():
192
+ # Return empty context if parsing fails
193
+ return SkillScriptContext(file_path=str(file_path), functions=[], imports=[], dataflows=[])
194
+
195
+ # Aggregate security indicators
196
+ has_network = any(f.has_network_calls for f in parser.functions)
197
+ has_file_ops = any(f.has_file_operations for f in parser.functions)
198
+ has_subprocess = any(f.has_subprocess for f in parser.functions)
199
+ has_eval_exec = any(f.has_eval_exec for f in parser.functions)
200
+
201
+ # Use CFG-based ForwardDataflowAnalysis for script-level source detection and flow tracking
202
+ try:
203
+ forward_analyzer = ForwardDataflowAnalysis(parser, parameter_names=[], detect_sources=True)
204
+ script_flows = forward_analyzer.analyze_forward_flows()
205
+ except Exception as e:
206
+ import logging
207
+
208
+ logging.getLogger(__name__).warning(f"CFG-based script-level analysis failed: {e}")
209
+ script_flows = []
210
+
211
+ # Extract credential/env access from detected sources
212
+ has_credential_access = any(flow.parameter_name.startswith("credential_file:") for flow in script_flows)
213
+ has_env_var_access = any(flow.parameter_name.startswith("env_var:") for flow in script_flows)
214
+
215
+ # Extract dangerous flows
216
+ dangerous_flows = []
217
+ for flow in script_flows:
218
+ if flow.reaches_external:
219
+ source_type = "parameter"
220
+ if flow.parameter_name.startswith("credential_file:"):
221
+ source_type = "credential_file"
222
+ elif flow.parameter_name.startswith("env_var:"):
223
+ source_type = "env_var"
224
+
225
+ # Determine sink type from calls
226
+ sink_type = "external"
227
+ network_calls = ["requests", "urllib", "http", "socket", "post", "get"]
228
+ eval_calls = ["eval", "exec", "compile"]
229
+ if any(any(nc in call.lower() for nc in network_calls) for call in flow.reaches_calls):
230
+ sink_type = "network"
231
+ elif any(any(ec in call.lower() for ec in eval_calls) for call in flow.reaches_calls):
232
+ sink_type = "eval"
233
+
234
+ dangerous_flows.append(
235
+ {
236
+ "source_type": source_type,
237
+ "source_name": flow.parameter_name,
238
+ "sink_type": sink_type,
239
+ "sink_operation": ", ".join(flow.reaches_calls),
240
+ "is_dangerous": True,
241
+ }
242
+ )
243
+
244
+ has_exfiltration_chain = any(
245
+ flow.get("source_type") in ["credential_file", "env_var"] and flow.get("sink_type") == "network"
246
+ for flow in dangerous_flows
247
+ )
248
+ has_injection_chain = any(
249
+ flow.get("source_type") == "parameter" and flow.get("sink_type") == "eval" for flow in dangerous_flows
250
+ )
251
+
252
+ # Collect all function calls and strings
253
+ all_calls = []
254
+ all_strings = []
255
+ for func in parser.functions:
256
+ all_calls.extend(func.function_calls)
257
+ all_strings.extend(func.string_literals)
258
+
259
+ # Also collect module-level strings (class attributes, etc.)
260
+ all_strings.extend(parser.module_strings)
261
+
262
+ # Find suspicious URLs (filter out legitimate domains and docstrings)
263
+ suspicious_urls = []
264
+ for s in all_strings:
265
+ # Skip if not URL-like or contains newlines (docstrings)
266
+ if "\n" in s or not s.startswith("http"):
267
+ continue
268
+ # Skip if too long (likely docstring) or too short
269
+ if len(s) > 200 or len(s) < 10:
270
+ continue
271
+ # Skip if contains legitimate domain
272
+ if any(domain in s for domain in self.LEGITIMATE_DOMAINS):
273
+ continue
274
+ # Flag if contains known suspicious domain OR is generic http URL
275
+ if any(domain in s for domain in self.SUSPICIOUS_DOMAINS):
276
+ suspicious_urls.append(s)
277
+ # Generic URLs only if they look suspicious (not just schema URLs)
278
+ elif not any(schema in s for schema in ["schemas.", "www.w3.org", "xmlns"]):
279
+ suspicious_urls.append(s)
280
+
281
+ # Create context
282
+ context = SkillScriptContext(
283
+ file_path=str(file_path),
284
+ functions=parser.functions,
285
+ imports=parser.imports,
286
+ dataflows=[], # Empty - using pattern detection instead
287
+ has_network=has_network,
288
+ has_file_ops=has_file_ops,
289
+ has_subprocess=has_subprocess,
290
+ has_eval_exec=has_eval_exec,
291
+ has_credential_access=has_credential_access,
292
+ has_env_var_access=has_env_var_access,
293
+ dangerous_flows=dangerous_flows,
294
+ has_exfiltration_chain=has_exfiltration_chain,
295
+ has_injection_chain=has_injection_chain,
296
+ all_function_calls=list(set(all_calls)),
297
+ all_string_literals=all_strings,
298
+ suspicious_urls=suspicious_urls,
299
+ )
300
+
301
+ return context
302
+
303
+ def extract_function_contexts(self, file_path: Path, source_code: str) -> list[SkillFunctionContext]:
304
+ """Extract detailed context for each function in the source code.
305
+
306
+ Used by the alignment verification layer to analyze individual functions.
307
+
308
+ Args:
309
+ file_path: Path to the script file
310
+ source_code: Python source code
311
+
312
+ Returns:
313
+ List of SkillFunctionContext for each function
314
+ """
315
+ contexts = []
316
+
317
+ try:
318
+ tree = ast.parse(source_code)
319
+ except SyntaxError:
320
+ return contexts
321
+
322
+ # Parse with AST parser
323
+ parser = PythonParser(source_code)
324
+ if not parser.parse():
325
+ return contexts
326
+
327
+ # Extract module-level imports
328
+ imports = parser.imports
329
+
330
+ # Process each function
331
+ for node in ast.walk(tree):
332
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
333
+ context = self._extract_function_context(node, imports, source_code, file_path)
334
+ if context:
335
+ contexts.append(context)
336
+
337
+ return contexts
338
+
339
+ def _extract_function_context(
340
+ self, node: ast.FunctionDef, imports: list[str], source_code: str, file_path: Path
341
+ ) -> SkillFunctionContext:
342
+ """Extract detailed context for a single function.
343
+
344
+ Args:
345
+ node: Function AST node
346
+ imports: Module-level imports
347
+ source_code: Full source code
348
+ file_path: Path to the file
349
+
350
+ Returns:
351
+ SkillFunctionContext with extracted information
352
+ """
353
+ # Basic info
354
+ name = node.name
355
+ docstring = ast.get_docstring(node)
356
+ parameters = self._extract_parameters(node)
357
+ return_type = self._extract_return_type(node)
358
+ line_number = node.lineno
359
+
360
+ # Code structure
361
+ function_calls = self._extract_function_calls(node)
362
+ assignments = self._extract_assignments(node)
363
+ control_flow = self._analyze_control_flow(node)
364
+
365
+ # Parameter flow analysis
366
+ parameter_flows = self._analyze_parameter_flows(node, parameters)
367
+
368
+ # Constants
369
+ constants = self._extract_constants(node)
370
+
371
+ # Variable dependencies
372
+ var_deps = self._analyze_variable_dependencies(node)
373
+
374
+ # Behavioral patterns
375
+ has_file_ops = self._has_file_operations(node)
376
+ has_network_ops = self._has_network_operations(node)
377
+ has_subprocess = self._has_subprocess_calls(node)
378
+ has_eval_exec = self._has_eval_exec(node)
379
+
380
+ # High-value security indicators
381
+ string_literals = self._extract_string_literals(node)
382
+ return_expressions = self._extract_return_expressions(node)
383
+ exception_handlers = self._extract_exception_handlers(node)
384
+ env_var_access = self._extract_env_var_access(node)
385
+
386
+ # State manipulation
387
+ global_writes = self._extract_global_writes(node)
388
+ attribute_access = self._extract_attribute_access(node)
389
+
390
+ # Dataflow summary
391
+ dataflow_summary = self._create_dataflow_summary(node)
392
+
393
+ return SkillFunctionContext(
394
+ name=name,
395
+ docstring=docstring,
396
+ parameters=parameters,
397
+ return_type=return_type,
398
+ line_number=line_number,
399
+ imports=imports,
400
+ function_calls=function_calls,
401
+ assignments=assignments,
402
+ control_flow=control_flow,
403
+ parameter_flows=parameter_flows,
404
+ constants=constants,
405
+ variable_dependencies=var_deps,
406
+ has_file_operations=has_file_ops,
407
+ has_network_operations=has_network_ops,
408
+ has_subprocess_calls=has_subprocess,
409
+ has_eval_exec=has_eval_exec,
410
+ string_literals=string_literals,
411
+ return_expressions=return_expressions,
412
+ exception_handlers=exception_handlers,
413
+ env_var_access=env_var_access,
414
+ global_writes=global_writes,
415
+ attribute_access=attribute_access,
416
+ dataflow_summary=dataflow_summary,
417
+ )
418
+
419
+ def _extract_parameters(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
420
+ """Extract function parameters with type hints."""
421
+ params = []
422
+ for arg in node.args.args:
423
+ param_info = {"name": arg.arg}
424
+ if arg.annotation:
425
+ try:
426
+ param_info["type"] = ast.unparse(arg.annotation)
427
+ except (AttributeError, TypeError, ValueError):
428
+ param_info["type"] = "<unknown>"
429
+ params.append(param_info)
430
+ return params
431
+
432
+ def _extract_return_type(self, node: ast.FunctionDef) -> str:
433
+ """Extract return type annotation."""
434
+ if node.returns:
435
+ try:
436
+ return ast.unparse(node.returns)
437
+ except (AttributeError, TypeError, ValueError):
438
+ return "<unknown>"
439
+ return None
440
+
441
+ def _extract_function_calls(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
442
+ """Extract all function calls with arguments."""
443
+ calls = []
444
+ for child in ast.walk(node):
445
+ if isinstance(child, ast.Call):
446
+ args_list = []
447
+ for arg in child.args:
448
+ try:
449
+ args_list.append(ast.unparse(arg))
450
+ except (AttributeError, TypeError, ValueError):
451
+ args_list.append("<complex>")
452
+
453
+ call_info = {
454
+ "name": self._get_call_name(child),
455
+ "args": args_list,
456
+ "line": child.lineno if hasattr(child, "lineno") else 0,
457
+ }
458
+ calls.append(call_info)
459
+ return calls
460
+
461
+ def _get_call_name(self, node: ast.Call) -> str:
462
+ """Get function call name."""
463
+ if isinstance(node.func, ast.Name):
464
+ return node.func.id
465
+ elif isinstance(node.func, ast.Attribute):
466
+ parts = []
467
+ current = node.func
468
+ while isinstance(current, ast.Attribute):
469
+ parts.append(current.attr)
470
+ current = current.value
471
+ if isinstance(current, ast.Name):
472
+ parts.append(current.id)
473
+ return ".".join(reversed(parts))
474
+ try:
475
+ return ast.unparse(node.func)
476
+ except (AttributeError, TypeError, ValueError):
477
+ return "<unknown>"
478
+
479
+ def _extract_assignments(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
480
+ """Extract all assignments."""
481
+ assignments = []
482
+ for child in ast.walk(node):
483
+ if isinstance(child, ast.Assign):
484
+ for target in child.targets:
485
+ if isinstance(target, ast.Name):
486
+ try:
487
+ value_str = ast.unparse(child.value)
488
+ except (AttributeError, TypeError, ValueError):
489
+ value_str = "<complex>"
490
+ assignments.append(
491
+ {
492
+ "variable": target.id,
493
+ "value": value_str,
494
+ "line": child.lineno if hasattr(child, "lineno") else 0,
495
+ }
496
+ )
497
+ return assignments
498
+
499
+ def _analyze_control_flow(self, node: ast.FunctionDef) -> dict[str, Any]:
500
+ """Analyze control flow structure."""
501
+ has_if = any(isinstance(n, ast.If) for n in ast.walk(node))
502
+ has_for = any(isinstance(n, (ast.For, ast.AsyncFor)) for n in ast.walk(node))
503
+ has_while = any(isinstance(n, ast.While) for n in ast.walk(node))
504
+ has_try = any(isinstance(n, ast.Try) for n in ast.walk(node))
505
+
506
+ return {
507
+ "has_conditionals": has_if,
508
+ "has_loops": has_for or has_while,
509
+ "has_exception_handling": has_try,
510
+ }
511
+
512
+ def _analyze_parameter_flows(self, node: ast.FunctionDef, parameters: list[dict[str, Any]]) -> list[dict[str, Any]]:
513
+ """Analyze how parameters flow through the function using CFG-based analysis.
514
+
515
+ Uses proper control flow graph and fixpoint analysis for accurate tracking
516
+ through branches, loops, and function calls.
517
+ """
518
+ flows = []
519
+ param_names = [p["name"] for p in parameters]
520
+
521
+ if not param_names:
522
+ return flows
523
+
524
+ # Extract function source code for parser
525
+ try:
526
+ func_source = ast.unparse(node) if hasattr(ast, "unparse") else None
527
+ except (AttributeError, TypeError, ValueError):
528
+ # Reconstruct from AST if unparse fails
529
+
530
+ param_str = ", ".join(p["name"] for p in parameters)
531
+ func_source = f"def {node.name}({param_str}):\n pass"
532
+
533
+ if not func_source:
534
+ return flows
535
+
536
+ # Create parser and run CFG-based forward analysis
537
+ parser = PythonParser(func_source)
538
+ if not parser.parse():
539
+ return flows
540
+
541
+ try:
542
+ forward_analyzer = ForwardDataflowAnalysis(parser, param_names)
543
+ flow_paths = forward_analyzer.analyze_forward_flows()
544
+
545
+ # Convert FlowPath objects to dict format
546
+ for flow_path in flow_paths:
547
+ flows.append(
548
+ {
549
+ "parameter": flow_path.parameter_name,
550
+ "operations": flow_path.operations,
551
+ "reaches_calls": flow_path.reaches_calls,
552
+ "reaches_assignments": flow_path.reaches_assignments,
553
+ "reaches_returns": flow_path.reaches_returns,
554
+ "reaches_external": flow_path.reaches_external,
555
+ }
556
+ )
557
+ except Exception as e:
558
+ # Log error but return empty flows (no fallback)
559
+ import logging
560
+
561
+ logging.getLogger(__name__).warning(f"CFG-based parameter flow analysis failed: {e}")
562
+ return flows
563
+
564
+ return flows
565
+
566
+ def _extract_constants(self, node: ast.FunctionDef) -> dict[str, Any]:
567
+ """Extract constant values."""
568
+ constants = {}
569
+ for child in ast.walk(node):
570
+ if isinstance(child, ast.Assign):
571
+ for target in child.targets:
572
+ if isinstance(target, ast.Name) and isinstance(child.value, ast.Constant):
573
+ constants[target.id] = child.value.value
574
+ return constants
575
+
576
+ def _analyze_variable_dependencies(self, node: ast.FunctionDef) -> dict[str, list[str]]:
577
+ """Analyze variable dependencies."""
578
+ dependencies = {}
579
+ for child in ast.walk(node):
580
+ if isinstance(child, ast.Assign):
581
+ for target in child.targets:
582
+ if isinstance(target, ast.Name):
583
+ deps = []
584
+ for name_node in ast.walk(child.value):
585
+ if isinstance(name_node, ast.Name):
586
+ deps.append(name_node.id)
587
+ dependencies[target.id] = deps
588
+ return dependencies
589
+
590
+ def _has_file_operations(self, node: ast.FunctionDef) -> bool:
591
+ """Check for file operations."""
592
+ file_patterns = ["open", "read", "write", "path", "file", "os.remove", "shutil"]
593
+ for child in ast.walk(node):
594
+ if isinstance(child, ast.Call):
595
+ call_name = self._get_call_name(child).lower()
596
+ if any(pattern in call_name for pattern in file_patterns):
597
+ return True
598
+ return False
599
+
600
+ def _has_network_operations(self, node: ast.FunctionDef) -> bool:
601
+ """Check for network operations."""
602
+ network_patterns = ["requests", "urllib", "http", "socket", "post", "get", "fetch"]
603
+ for child in ast.walk(node):
604
+ if isinstance(child, ast.Call):
605
+ call_name = self._get_call_name(child).lower()
606
+ if any(pattern in call_name for pattern in network_patterns):
607
+ return True
608
+ return False
609
+
610
+ def _has_subprocess_calls(self, node: ast.FunctionDef) -> bool:
611
+ """Check for subprocess calls."""
612
+ subprocess_patterns = ["subprocess", "os.system", "os.popen", "shell", "exec"]
613
+ for child in ast.walk(node):
614
+ if isinstance(child, ast.Call):
615
+ call_name = self._get_call_name(child).lower()
616
+ if any(pattern in call_name for pattern in subprocess_patterns):
617
+ return True
618
+ return False
619
+
620
+ def _has_eval_exec(self, node: ast.FunctionDef) -> bool:
621
+ """Check for eval/exec calls."""
622
+ for child in ast.walk(node):
623
+ if isinstance(child, ast.Call):
624
+ call_name = self._get_call_name(child)
625
+ if call_name in ["eval", "exec", "compile", "__import__"]:
626
+ return True
627
+ return False
628
+
629
+ def _extract_string_literals(self, node: ast.FunctionDef) -> list[str]:
630
+ """Extract all string literals from function."""
631
+ literals = []
632
+ for child in ast.walk(node):
633
+ if isinstance(child, ast.Constant) and isinstance(child.value, str):
634
+ literal = child.value[:200]
635
+ if literal and literal not in literals:
636
+ literals.append(literal)
637
+ return literals[:20]
638
+
639
+ def _extract_return_expressions(self, node: ast.FunctionDef) -> list[str]:
640
+ """Extract return expressions from function."""
641
+ returns = []
642
+ for child in ast.walk(node):
643
+ if isinstance(child, ast.Return) and child.value:
644
+ try:
645
+ return_expr = ast.unparse(child.value)[:100]
646
+ returns.append(return_expr)
647
+ except (AttributeError, TypeError, ValueError):
648
+ returns.append("<unparseable>")
649
+ return returns
650
+
651
+ def _extract_exception_handlers(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
652
+ """Extract exception handling details."""
653
+ handlers = []
654
+ for child in ast.walk(node):
655
+ if isinstance(child, ast.ExceptHandler):
656
+ handler_info = {
657
+ "line": child.lineno,
658
+ "exception_type": ast.unparse(child.type) if child.type else "Exception",
659
+ "is_silent": len(child.body) == 1 and isinstance(child.body[0], ast.Pass),
660
+ }
661
+ handlers.append(handler_info)
662
+ return handlers
663
+
664
+ def _extract_env_var_access(self, node: ast.FunctionDef) -> list[str]:
665
+ """Extract environment variable accesses."""
666
+ env_accesses = []
667
+ for child in ast.walk(node):
668
+ if isinstance(child, ast.Call):
669
+ call_name = self._get_call_name(child)
670
+ if "environ" in call_name or "getenv" in call_name:
671
+ if child.args and isinstance(child.args[0], ast.Constant):
672
+ key = child.args[0].value
673
+ env_accesses.append(f"{call_name}('{key}')")
674
+ else:
675
+ env_accesses.append(call_name)
676
+ return env_accesses
677
+
678
+ def _extract_global_writes(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
679
+ """Extract global variable writes."""
680
+ global_writes = []
681
+ global_vars = set()
682
+
683
+ for child in ast.walk(node):
684
+ if isinstance(child, ast.Global):
685
+ global_vars.update(child.names)
686
+
687
+ for child in ast.walk(node):
688
+ if isinstance(child, ast.Assign):
689
+ for target in child.targets:
690
+ if isinstance(target, ast.Name) and target.id in global_vars:
691
+ try:
692
+ value_str = ast.unparse(child.value)[:100]
693
+ except (AttributeError, TypeError, ValueError):
694
+ value_str = "<complex>"
695
+ global_writes.append({"variable": target.id, "value": value_str, "line": child.lineno})
696
+
697
+ return global_writes
698
+
699
+ def _extract_attribute_access(self, node: ast.FunctionDef) -> list[dict[str, Any]]:
700
+ """Extract attribute access patterns."""
701
+ attribute_ops = []
702
+
703
+ for child in ast.walk(node):
704
+ if isinstance(child, ast.Assign):
705
+ for target in child.targets:
706
+ if isinstance(target, ast.Attribute):
707
+ obj_name = ""
708
+ if isinstance(target.value, ast.Name):
709
+ obj_name = target.value.id
710
+ try:
711
+ value_str = ast.unparse(child.value)[:100]
712
+ except (AttributeError, TypeError, ValueError):
713
+ value_str = "<complex>"
714
+ attribute_ops.append(
715
+ {
716
+ "type": "write",
717
+ "object": obj_name,
718
+ "attribute": target.attr,
719
+ "value": value_str,
720
+ "line": child.lineno,
721
+ }
722
+ )
723
+
724
+ return attribute_ops[:20]
725
+
726
+ def _create_dataflow_summary(self, node: ast.FunctionDef) -> dict[str, Any]:
727
+ """Create dataflow summary."""
728
+ return {
729
+ "total_statements": len([n for n in ast.walk(node) if isinstance(n, ast.stmt)]),
730
+ "total_expressions": len([n for n in ast.walk(node) if isinstance(n, ast.expr)]),
731
+ "complexity": self._calculate_complexity(node),
732
+ }
733
+
734
+ def _calculate_complexity(self, node: ast.FunctionDef) -> int:
735
+ """Calculate cyclomatic complexity."""
736
+ complexity = 1
737
+ for child in ast.walk(node):
738
+ if isinstance(child, (ast.If, ast.For, ast.While, ast.ExceptHandler)):
739
+ complexity += 1
740
+ elif isinstance(child, ast.BoolOp):
741
+ complexity += len(child.values) - 1
742
+ return complexity