code2llm 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +23 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/pipeline_detector.py +445 -0
  10. code2flow/analysis/side_effects.py +313 -0
  11. code2flow/analysis/smells.py +192 -0
  12. code2flow/analysis/type_inference.py +306 -0
  13. code2flow/cli.py +493 -0
  14. code2flow/core/__init__.py +36 -0
  15. code2flow/core/analyzer.py +765 -0
  16. code2flow/core/config.py +177 -0
  17. code2flow/core/models.py +194 -0
  18. code2flow/core/streaming_analyzer.py +666 -0
  19. code2flow/exporters/__init__.py +35 -0
  20. code2flow/exporters/base.py +13 -0
  21. code2flow/exporters/context_exporter.py +207 -0
  22. code2flow/exporters/flow_exporter.py +570 -0
  23. code2flow/exporters/json_exporter.py +17 -0
  24. code2flow/exporters/llm_exporter.py +12 -0
  25. code2flow/exporters/map_exporter.py +218 -0
  26. code2flow/exporters/mermaid_exporter.py +67 -0
  27. code2flow/exporters/toon.py +982 -0
  28. code2flow/exporters/yaml_exporter.py +108 -0
  29. code2flow/llm_flow_generator.py +451 -0
  30. code2flow/llm_task_generator.py +263 -0
  31. code2flow/mermaid_generator.py +481 -0
  32. code2flow/nlp/__init__.py +23 -0
  33. code2flow/nlp/config.py +174 -0
  34. code2flow/nlp/entity_resolution.py +326 -0
  35. code2flow/nlp/intent_matching.py +297 -0
  36. code2flow/nlp/normalization.py +122 -0
  37. code2flow/nlp/pipeline.py +388 -0
  38. code2flow/patterns/__init__.py +0 -0
  39. code2flow/patterns/detector.py +168 -0
  40. code2flow/refactor/__init__.py +0 -0
  41. code2flow/refactor/prompt_engine.py +150 -0
  42. code2flow/visualizers/__init__.py +0 -0
  43. code2flow/visualizers/graph.py +196 -0
  44. code2llm-0.3.7.dist-info/METADATA +604 -0
  45. code2llm-0.3.7.dist-info/RECORD +49 -0
  46. code2llm-0.3.7.dist-info/WHEEL +5 -0
  47. code2llm-0.3.7.dist-info/entry_points.txt +2 -0
  48. code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
  49. code2llm-0.3.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,177 @@
1
+ """Configuration and constants for code2flow."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Set
5
+ from enum import Enum
6
+
7
+
8
+ class AnalysisMode(str, Enum):
9
+ """Available analysis modes."""
10
+ STATIC = "static"
11
+ DYNAMIC = "dynamic"
12
+ HYBRID = "hybrid"
13
+ BEHAVIORAL = "behavioral"
14
+ REVERSE = "reverse"
15
+
16
+
17
+ @dataclass
18
+ class PerformanceConfig:
19
+ """Performance optimization settings."""
20
+ enable_cache: bool = True
21
+ cache_dir: str = ".code2flow_cache"
22
+ cache_ttl_hours: int = 24
23
+ parallel_workers: int = 4
24
+ parallel_enabled: bool = True
25
+ max_memory_mb: int = 2048
26
+ max_nodes_per_file: int = 1000
27
+ max_total_nodes: int = 10000
28
+ max_edges: int = 50000
29
+ fast_mode: bool = False
30
+ skip_data_flow: bool = False
31
+ skip_pattern_detection: bool = False
32
+
33
+
34
+ @dataclass
35
+ class FilterConfig:
36
+ """Filtering options to reduce analysis scope."""
37
+ exclude_tests: bool = True
38
+ exclude_patterns: List[str] = field(default_factory=lambda: [
39
+ "*test*", "*__pycache__*", "*.pyc", "*venv*", "*.venv*",
40
+ "*node_modules*", "*.git*", "*build*", "*dist*",
41
+ "*_test.py", "test_*.py", "conftest.py"
42
+ ])
43
+ include_patterns: List[str] = field(default_factory=list)
44
+ min_function_lines: int = 1
45
+ skip_private: bool = False
46
+ skip_properties: bool = True
47
+ skip_accessors: bool = True
48
+
49
+
50
+ @dataclass
51
+ class DepthConfig:
52
+ """Depth limiting for control flow analysis."""
53
+ max_cfg_depth: int = 5
54
+ max_call_depth: int = 3
55
+ max_data_flow_depth: int = 2
56
+ max_interprocedural_depth: int = 2
57
+
58
+
59
+ @dataclass
60
+ class OutputConfig:
61
+ """Output formatting options."""
62
+ compact: bool = True
63
+ include_source: bool = False
64
+ max_label_length: int = 50
65
+ group_by_module: bool = True
66
+
67
+
68
+ @dataclass
69
+ class Config:
70
+ """Analysis configuration with performance optimizations."""
71
+
72
+ # Analysis mode
73
+ mode: str = "hybrid"
74
+
75
+ # Sub-configs for performance
76
+ performance: PerformanceConfig = field(default_factory=PerformanceConfig)
77
+ filters: FilterConfig = field(default_factory=FilterConfig)
78
+ depth: DepthConfig = field(default_factory=DepthConfig)
79
+ output: OutputConfig = field(default_factory=OutputConfig)
80
+
81
+ # Legacy path limits (for compatibility)
82
+ max_paths_per_function: int = 20
83
+ max_depth_enumeration: int = 10
84
+ max_depth_interprocedural: int = 3
85
+ max_total_paths: int = 1000
86
+
87
+ # Output settings
88
+ output_formats: List[str] = field(default_factory=lambda: ["yaml", "mermaid", "png"])
89
+ output_dir: str = "output"
90
+
91
+ # Visualization
92
+ fig_size: tuple = (16, 12)
93
+ dpi: int = 300
94
+ layout: str = "sfdp" # dot, neato, fdp, sfdp, circo, twopi
95
+
96
+ # Pattern detection
97
+ detect_state_machines: bool = True
98
+ detect_recursion: bool = True
99
+ detect_loops: bool = True
100
+
101
+ # Dynamic analysis
102
+ trace_runtime: bool = False
103
+ skip_packages: Set[str] = field(default_factory=lambda: {
104
+ 'site-packages', 'dist-packages', 'venv', '.venv'
105
+ })
106
+
107
+ # Logging
108
+ verbose: bool = False
109
+ quiet: bool = False
110
+
111
+
112
+ # Predefined fast configuration
113
+ FAST_CONFIG = Config(
114
+ mode="static",
115
+ performance=PerformanceConfig(
116
+ fast_mode=True,
117
+ skip_data_flow=True,
118
+ skip_pattern_detection=True,
119
+ parallel_enabled=True,
120
+ parallel_workers=8,
121
+ max_nodes_per_file=500,
122
+ max_total_nodes=5000,
123
+ ),
124
+ filters=FilterConfig(
125
+ exclude_tests=True,
126
+ skip_private=True,
127
+ skip_properties=True,
128
+ skip_accessors=True,
129
+ min_function_lines=1,
130
+ ),
131
+ depth=DepthConfig(
132
+ max_cfg_depth=3,
133
+ max_call_depth=2,
134
+ max_data_flow_depth=1,
135
+ max_interprocedural_depth=1,
136
+ ),
137
+ output=OutputConfig(compact=True, include_source=False),
138
+ layout="dot",
139
+ )
140
+
141
+
142
+ # Analysis modes descriptions
143
+ ANALYSIS_MODES = {
144
+ 'static': 'AST-based control and data flow analysis',
145
+ 'dynamic': 'Runtime execution tracing',
146
+ 'hybrid': 'Combined static + dynamic analysis',
147
+ 'behavioral': 'Behavioral pattern extraction',
148
+ 'reverse': 'Reverse engineering ready output'
149
+ }
150
+
151
+
152
+ # Node types
153
+ NODE_TYPES = {
154
+ 'FUNC': 'Function definition',
155
+ 'CALL': 'Function call',
156
+ 'IF': 'Conditional branch',
157
+ 'FOR': 'For loop',
158
+ 'WHILE': 'While loop',
159
+ 'ASSIGN': 'Variable assignment',
160
+ 'RETURN': 'Return statement',
161
+ 'ENTRY': 'Entry point',
162
+ 'EXIT': 'Exit point',
163
+ }
164
+
165
+
166
+ # Colors for visualization
167
+ NODE_COLORS = {
168
+ 'FUNC': '#4CAF50',
169
+ 'CALL': '#2196F3',
170
+ 'IF': '#FF9800',
171
+ 'FOR': '#9C27B0',
172
+ 'WHILE': '#9C27B0',
173
+ 'ASSIGN': '#607D8B',
174
+ 'RETURN': '#E91E63',
175
+ 'ENTRY': '#00BCD4',
176
+ 'EXIT': '#F44336',
177
+ }
@@ -0,0 +1,194 @@
1
+ from dataclasses import dataclass, field, asdict
2
+ from typing import List, Dict, Set, Optional, Any
3
+ from pathlib import Path
4
+
5
+
6
+ class BaseModel:
7
+ """Base class for models with automated serialization."""
8
+ def to_dict(self, compact: bool = True) -> dict:
9
+ """Convert to dictionary using dataclasses.asdict with filtering."""
10
+ data = asdict(self)
11
+ if compact:
12
+ return self._filter_compact(data)
13
+ return data
14
+
15
+ def _filter_compact(self, data: Any) -> Any:
16
+ """Recursively filter out None and empty collections if compact."""
17
+ if isinstance(data, dict):
18
+ return {
19
+ k: self._filter_compact(v)
20
+ for k, v in data.items()
21
+ if v is not None and (not isinstance(v, (list, dict, set)) or len(v) > 0)
22
+ }
23
+ elif isinstance(data, (list, tuple, set)):
24
+ return [self._filter_compact(v) for v in data]
25
+ return data
26
+
27
+
28
+ @dataclass
29
+ class FlowNode(BaseModel):
30
+ """Represents a node in the control flow graph."""
31
+ id: str
32
+ type: str # FUNC, CALL, IF, FOR, WHILE, ASSIGN, RETURN, ENTRY, EXIT
33
+ label: str
34
+ function: Optional[str] = None
35
+ file: Optional[str] = None
36
+ line: Optional[int] = None
37
+ column: Optional[int] = None
38
+ conditions: List[str] = field(default_factory=list)
39
+ data_flow: List[str] = field(default_factory=list)
40
+ metadata: Dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass
44
+ class FlowEdge(BaseModel):
45
+ """Represents an edge in the control flow graph."""
46
+ source: str
47
+ target: str
48
+ edge_type: str = "control" # control, data, call
49
+ label: Optional[str] = None
50
+ conditions: List[str] = field(default_factory=list)
51
+
52
+
53
+ @dataclass
54
+ class FunctionInfo(BaseModel):
55
+ """Information about a function/method."""
56
+ name: str
57
+ qualified_name: str
58
+ file: str
59
+ line: int
60
+ column: int = 0
61
+ module: str = ""
62
+ class_name: Optional[str] = None
63
+ is_method: bool = False
64
+ is_private: bool = False
65
+ is_property: bool = False
66
+ docstring: Optional[str] = None
67
+ args: List[str] = field(default_factory=list)
68
+ returns: Optional[str] = None
69
+ decorators: List[str] = field(default_factory=list)
70
+
71
+ # CFG info
72
+ cfg_entry: Optional[str] = None
73
+ cfg_exit: Optional[str] = None
74
+ cfg_nodes: List[str] = field(default_factory=list)
75
+ calls: List[str] = field(default_factory=list)
76
+ called_by: List[str] = field(default_factory=list)
77
+
78
+ # Advanced metrics (Sprint 3)
79
+ complexity: Dict[str, Any] = field(default_factory=dict) # Cyclomatic, Cognitive
80
+ centrality: float = 0.0 # Betweenness Centrality
81
+ reachability: str = "unknown" # reachable, unreachable, unknown
82
+
83
+
84
+ @dataclass
85
+ class ClassInfo(BaseModel):
86
+ """Information about a class."""
87
+ name: str
88
+ qualified_name: str
89
+ file: str
90
+ line: int
91
+ module: str = ""
92
+ bases: List[str] = field(default_factory=list)
93
+ methods: List[str] = field(default_factory=list)
94
+ docstring: Optional[str] = None
95
+ is_state_machine: bool = False
96
+
97
+
98
+ @dataclass
99
+ class ModuleInfo(BaseModel):
100
+ """Information about a module/package."""
101
+ name: str
102
+ file: str
103
+ is_package: bool = False
104
+ imports: List[str] = field(default_factory=list)
105
+ functions: List[str] = field(default_factory=list)
106
+ classes: List[str] = field(default_factory=list)
107
+
108
+
109
+ @dataclass
110
+ class Pattern(BaseModel):
111
+ """Detected behavioral pattern."""
112
+ name: str
113
+ type: str # recursion, state_machine, factory, singleton, strategy, loop
114
+ confidence: float # 0.0 to 1.0
115
+ functions: List[str] = field(default_factory=list)
116
+ entry_points: List[str] = field(default_factory=list)
117
+ exit_points: List[str] = field(default_factory=list)
118
+ metadata: Dict[str, Any] = field(default_factory=dict)
119
+
120
+
121
+ @dataclass
122
+ class CodeSmell(BaseModel):
123
+ """Represents a detected code smell."""
124
+ name: str
125
+ type: str # god_function, feature_envy, etc.
126
+ file: str
127
+ line: int
128
+ severity: float # 0.0 to 1.0
129
+ description: str
130
+ context: Dict[str, Any] = field(default_factory=dict)
131
+
132
+
133
+ @dataclass
134
+ class Mutation(BaseModel):
135
+ """Represents a mutation of a variable/object."""
136
+ variable: str
137
+ file: str
138
+ line: int
139
+ type: str # assign, aug_assign, method_call
140
+ scope: str
141
+ context: str
142
+
143
+
144
+ @dataclass
145
+ class DataFlow(BaseModel):
146
+ """Represents data flow for a variable."""
147
+ variable: str
148
+ dependencies: Set[str] = field(default_factory=set)
149
+ metadata: Dict[str, Any] = field(default_factory=dict)
150
+
151
+
152
+ @dataclass
153
+ class AnalysisResult(BaseModel):
154
+ """Complete analysis result for a project."""
155
+ project_path: str = ""
156
+ analysis_mode: str = "static"
157
+ stats: Dict[str, int] = field(default_factory=dict)
158
+
159
+ # Graph data
160
+ nodes: Dict[str, FlowNode] = field(default_factory=dict)
161
+ edges: List[FlowEdge] = field(default_factory=list)
162
+
163
+ # Code structure
164
+ modules: Dict[str, ModuleInfo] = field(default_factory=dict)
165
+ classes: Dict[str, ClassInfo] = field(default_factory=dict)
166
+ functions: Dict[str, FunctionInfo] = field(default_factory=dict)
167
+
168
+ # Analysis results
169
+ patterns: List[Pattern] = field(default_factory=list)
170
+ call_graph: Dict[str, List[str]] = field(default_factory=list)
171
+ entry_points: List[str] = field(default_factory=list)
172
+ data_flows: Dict[str, DataFlow] = field(default_factory=dict)
173
+
174
+ # Refactoring data
175
+ metrics: Dict[str, Dict[str, Any]] = field(default_factory=dict)
176
+ smells: List[CodeSmell] = field(default_factory=list)
177
+ coupling: Dict[str, Any] = field(default_factory=dict)
178
+ mutations: List[Mutation] = field(default_factory=list)
179
+
180
+ def get_function_count(self) -> int:
181
+ """Get total function count."""
182
+ return len(self.functions)
183
+
184
+ def get_class_count(self) -> int:
185
+ """Get total class count."""
186
+ return len(self.classes)
187
+
188
+ def get_node_count(self) -> int:
189
+ """Get total CFG node count."""
190
+ return len(self.nodes)
191
+
192
+ def get_edge_count(self) -> int:
193
+ """Get total edge count."""
194
+ return len(self.edges)