code2llm 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +23 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/pipeline_detector.py +445 -0
- code2flow/analysis/side_effects.py +313 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/analysis/type_inference.py +306 -0
- code2flow/cli.py +493 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +35 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/context_exporter.py +207 -0
- code2flow/exporters/flow_exporter.py +570 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +12 -0
- code2flow/exporters/map_exporter.py +218 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +982 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2llm-0.3.7.dist-info/METADATA +604 -0
- code2llm-0.3.7.dist-info/RECORD +49 -0
- code2llm-0.3.7.dist-info/WHEEL +5 -0
- code2llm-0.3.7.dist-info/entry_points.txt +2 -0
- code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
- code2llm-0.3.7.dist-info/top_level.txt +1 -0
code2flow/core/config.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Configuration and constants for code2flow."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Set
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnalysisMode(str, Enum):
|
|
9
|
+
"""Available analysis modes."""
|
|
10
|
+
STATIC = "static"
|
|
11
|
+
DYNAMIC = "dynamic"
|
|
12
|
+
HYBRID = "hybrid"
|
|
13
|
+
BEHAVIORAL = "behavioral"
|
|
14
|
+
REVERSE = "reverse"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class PerformanceConfig:
|
|
19
|
+
"""Performance optimization settings."""
|
|
20
|
+
enable_cache: bool = True
|
|
21
|
+
cache_dir: str = ".code2flow_cache"
|
|
22
|
+
cache_ttl_hours: int = 24
|
|
23
|
+
parallel_workers: int = 4
|
|
24
|
+
parallel_enabled: bool = True
|
|
25
|
+
max_memory_mb: int = 2048
|
|
26
|
+
max_nodes_per_file: int = 1000
|
|
27
|
+
max_total_nodes: int = 10000
|
|
28
|
+
max_edges: int = 50000
|
|
29
|
+
fast_mode: bool = False
|
|
30
|
+
skip_data_flow: bool = False
|
|
31
|
+
skip_pattern_detection: bool = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class FilterConfig:
|
|
36
|
+
"""Filtering options to reduce analysis scope."""
|
|
37
|
+
exclude_tests: bool = True
|
|
38
|
+
exclude_patterns: List[str] = field(default_factory=lambda: [
|
|
39
|
+
"*test*", "*__pycache__*", "*.pyc", "*venv*", "*.venv*",
|
|
40
|
+
"*node_modules*", "*.git*", "*build*", "*dist*",
|
|
41
|
+
"*_test.py", "test_*.py", "conftest.py"
|
|
42
|
+
])
|
|
43
|
+
include_patterns: List[str] = field(default_factory=list)
|
|
44
|
+
min_function_lines: int = 1
|
|
45
|
+
skip_private: bool = False
|
|
46
|
+
skip_properties: bool = True
|
|
47
|
+
skip_accessors: bool = True
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class DepthConfig:
|
|
52
|
+
"""Depth limiting for control flow analysis."""
|
|
53
|
+
max_cfg_depth: int = 5
|
|
54
|
+
max_call_depth: int = 3
|
|
55
|
+
max_data_flow_depth: int = 2
|
|
56
|
+
max_interprocedural_depth: int = 2
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class OutputConfig:
|
|
61
|
+
"""Output formatting options."""
|
|
62
|
+
compact: bool = True
|
|
63
|
+
include_source: bool = False
|
|
64
|
+
max_label_length: int = 50
|
|
65
|
+
group_by_module: bool = True
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class Config:
|
|
70
|
+
"""Analysis configuration with performance optimizations."""
|
|
71
|
+
|
|
72
|
+
# Analysis mode
|
|
73
|
+
mode: str = "hybrid"
|
|
74
|
+
|
|
75
|
+
# Sub-configs for performance
|
|
76
|
+
performance: PerformanceConfig = field(default_factory=PerformanceConfig)
|
|
77
|
+
filters: FilterConfig = field(default_factory=FilterConfig)
|
|
78
|
+
depth: DepthConfig = field(default_factory=DepthConfig)
|
|
79
|
+
output: OutputConfig = field(default_factory=OutputConfig)
|
|
80
|
+
|
|
81
|
+
# Legacy path limits (for compatibility)
|
|
82
|
+
max_paths_per_function: int = 20
|
|
83
|
+
max_depth_enumeration: int = 10
|
|
84
|
+
max_depth_interprocedural: int = 3
|
|
85
|
+
max_total_paths: int = 1000
|
|
86
|
+
|
|
87
|
+
# Output settings
|
|
88
|
+
output_formats: List[str] = field(default_factory=lambda: ["yaml", "mermaid", "png"])
|
|
89
|
+
output_dir: str = "output"
|
|
90
|
+
|
|
91
|
+
# Visualization
|
|
92
|
+
fig_size: tuple = (16, 12)
|
|
93
|
+
dpi: int = 300
|
|
94
|
+
layout: str = "sfdp" # dot, neato, fdp, sfdp, circo, twopi
|
|
95
|
+
|
|
96
|
+
# Pattern detection
|
|
97
|
+
detect_state_machines: bool = True
|
|
98
|
+
detect_recursion: bool = True
|
|
99
|
+
detect_loops: bool = True
|
|
100
|
+
|
|
101
|
+
# Dynamic analysis
|
|
102
|
+
trace_runtime: bool = False
|
|
103
|
+
skip_packages: Set[str] = field(default_factory=lambda: {
|
|
104
|
+
'site-packages', 'dist-packages', 'venv', '.venv'
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
# Logging
|
|
108
|
+
verbose: bool = False
|
|
109
|
+
quiet: bool = False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# Predefined fast configuration
|
|
113
|
+
FAST_CONFIG = Config(
|
|
114
|
+
mode="static",
|
|
115
|
+
performance=PerformanceConfig(
|
|
116
|
+
fast_mode=True,
|
|
117
|
+
skip_data_flow=True,
|
|
118
|
+
skip_pattern_detection=True,
|
|
119
|
+
parallel_enabled=True,
|
|
120
|
+
parallel_workers=8,
|
|
121
|
+
max_nodes_per_file=500,
|
|
122
|
+
max_total_nodes=5000,
|
|
123
|
+
),
|
|
124
|
+
filters=FilterConfig(
|
|
125
|
+
exclude_tests=True,
|
|
126
|
+
skip_private=True,
|
|
127
|
+
skip_properties=True,
|
|
128
|
+
skip_accessors=True,
|
|
129
|
+
min_function_lines=1,
|
|
130
|
+
),
|
|
131
|
+
depth=DepthConfig(
|
|
132
|
+
max_cfg_depth=3,
|
|
133
|
+
max_call_depth=2,
|
|
134
|
+
max_data_flow_depth=1,
|
|
135
|
+
max_interprocedural_depth=1,
|
|
136
|
+
),
|
|
137
|
+
output=OutputConfig(compact=True, include_source=False),
|
|
138
|
+
layout="dot",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Analysis modes descriptions
|
|
143
|
+
ANALYSIS_MODES = {
|
|
144
|
+
'static': 'AST-based control and data flow analysis',
|
|
145
|
+
'dynamic': 'Runtime execution tracing',
|
|
146
|
+
'hybrid': 'Combined static + dynamic analysis',
|
|
147
|
+
'behavioral': 'Behavioral pattern extraction',
|
|
148
|
+
'reverse': 'Reverse engineering ready output'
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# Node types
|
|
153
|
+
NODE_TYPES = {
|
|
154
|
+
'FUNC': 'Function definition',
|
|
155
|
+
'CALL': 'Function call',
|
|
156
|
+
'IF': 'Conditional branch',
|
|
157
|
+
'FOR': 'For loop',
|
|
158
|
+
'WHILE': 'While loop',
|
|
159
|
+
'ASSIGN': 'Variable assignment',
|
|
160
|
+
'RETURN': 'Return statement',
|
|
161
|
+
'ENTRY': 'Entry point',
|
|
162
|
+
'EXIT': 'Exit point',
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Colors for visualization
|
|
167
|
+
NODE_COLORS = {
|
|
168
|
+
'FUNC': '#4CAF50',
|
|
169
|
+
'CALL': '#2196F3',
|
|
170
|
+
'IF': '#FF9800',
|
|
171
|
+
'FOR': '#9C27B0',
|
|
172
|
+
'WHILE': '#9C27B0',
|
|
173
|
+
'ASSIGN': '#607D8B',
|
|
174
|
+
'RETURN': '#E91E63',
|
|
175
|
+
'ENTRY': '#00BCD4',
|
|
176
|
+
'EXIT': '#F44336',
|
|
177
|
+
}
|
code2flow/core/models.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from dataclasses import dataclass, field, asdict
|
|
2
|
+
from typing import List, Dict, Set, Optional, Any
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseModel:
|
|
7
|
+
"""Base class for models with automated serialization."""
|
|
8
|
+
def to_dict(self, compact: bool = True) -> dict:
|
|
9
|
+
"""Convert to dictionary using dataclasses.asdict with filtering."""
|
|
10
|
+
data = asdict(self)
|
|
11
|
+
if compact:
|
|
12
|
+
return self._filter_compact(data)
|
|
13
|
+
return data
|
|
14
|
+
|
|
15
|
+
def _filter_compact(self, data: Any) -> Any:
|
|
16
|
+
"""Recursively filter out None and empty collections if compact."""
|
|
17
|
+
if isinstance(data, dict):
|
|
18
|
+
return {
|
|
19
|
+
k: self._filter_compact(v)
|
|
20
|
+
for k, v in data.items()
|
|
21
|
+
if v is not None and (not isinstance(v, (list, dict, set)) or len(v) > 0)
|
|
22
|
+
}
|
|
23
|
+
elif isinstance(data, (list, tuple, set)):
|
|
24
|
+
return [self._filter_compact(v) for v in data]
|
|
25
|
+
return data
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class FlowNode(BaseModel):
|
|
30
|
+
"""Represents a node in the control flow graph."""
|
|
31
|
+
id: str
|
|
32
|
+
type: str # FUNC, CALL, IF, FOR, WHILE, ASSIGN, RETURN, ENTRY, EXIT
|
|
33
|
+
label: str
|
|
34
|
+
function: Optional[str] = None
|
|
35
|
+
file: Optional[str] = None
|
|
36
|
+
line: Optional[int] = None
|
|
37
|
+
column: Optional[int] = None
|
|
38
|
+
conditions: List[str] = field(default_factory=list)
|
|
39
|
+
data_flow: List[str] = field(default_factory=list)
|
|
40
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class FlowEdge(BaseModel):
|
|
45
|
+
"""Represents an edge in the control flow graph."""
|
|
46
|
+
source: str
|
|
47
|
+
target: str
|
|
48
|
+
edge_type: str = "control" # control, data, call
|
|
49
|
+
label: Optional[str] = None
|
|
50
|
+
conditions: List[str] = field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class FunctionInfo(BaseModel):
|
|
55
|
+
"""Information about a function/method."""
|
|
56
|
+
name: str
|
|
57
|
+
qualified_name: str
|
|
58
|
+
file: str
|
|
59
|
+
line: int
|
|
60
|
+
column: int = 0
|
|
61
|
+
module: str = ""
|
|
62
|
+
class_name: Optional[str] = None
|
|
63
|
+
is_method: bool = False
|
|
64
|
+
is_private: bool = False
|
|
65
|
+
is_property: bool = False
|
|
66
|
+
docstring: Optional[str] = None
|
|
67
|
+
args: List[str] = field(default_factory=list)
|
|
68
|
+
returns: Optional[str] = None
|
|
69
|
+
decorators: List[str] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
# CFG info
|
|
72
|
+
cfg_entry: Optional[str] = None
|
|
73
|
+
cfg_exit: Optional[str] = None
|
|
74
|
+
cfg_nodes: List[str] = field(default_factory=list)
|
|
75
|
+
calls: List[str] = field(default_factory=list)
|
|
76
|
+
called_by: List[str] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
# Advanced metrics (Sprint 3)
|
|
79
|
+
complexity: Dict[str, Any] = field(default_factory=dict) # Cyclomatic, Cognitive
|
|
80
|
+
centrality: float = 0.0 # Betweenness Centrality
|
|
81
|
+
reachability: str = "unknown" # reachable, unreachable, unknown
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class ClassInfo(BaseModel):
|
|
86
|
+
"""Information about a class."""
|
|
87
|
+
name: str
|
|
88
|
+
qualified_name: str
|
|
89
|
+
file: str
|
|
90
|
+
line: int
|
|
91
|
+
module: str = ""
|
|
92
|
+
bases: List[str] = field(default_factory=list)
|
|
93
|
+
methods: List[str] = field(default_factory=list)
|
|
94
|
+
docstring: Optional[str] = None
|
|
95
|
+
is_state_machine: bool = False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class ModuleInfo(BaseModel):
|
|
100
|
+
"""Information about a module/package."""
|
|
101
|
+
name: str
|
|
102
|
+
file: str
|
|
103
|
+
is_package: bool = False
|
|
104
|
+
imports: List[str] = field(default_factory=list)
|
|
105
|
+
functions: List[str] = field(default_factory=list)
|
|
106
|
+
classes: List[str] = field(default_factory=list)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class Pattern(BaseModel):
|
|
111
|
+
"""Detected behavioral pattern."""
|
|
112
|
+
name: str
|
|
113
|
+
type: str # recursion, state_machine, factory, singleton, strategy, loop
|
|
114
|
+
confidence: float # 0.0 to 1.0
|
|
115
|
+
functions: List[str] = field(default_factory=list)
|
|
116
|
+
entry_points: List[str] = field(default_factory=list)
|
|
117
|
+
exit_points: List[str] = field(default_factory=list)
|
|
118
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class CodeSmell(BaseModel):
|
|
123
|
+
"""Represents a detected code smell."""
|
|
124
|
+
name: str
|
|
125
|
+
type: str # god_function, feature_envy, etc.
|
|
126
|
+
file: str
|
|
127
|
+
line: int
|
|
128
|
+
severity: float # 0.0 to 1.0
|
|
129
|
+
description: str
|
|
130
|
+
context: Dict[str, Any] = field(default_factory=dict)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class Mutation(BaseModel):
|
|
135
|
+
"""Represents a mutation of a variable/object."""
|
|
136
|
+
variable: str
|
|
137
|
+
file: str
|
|
138
|
+
line: int
|
|
139
|
+
type: str # assign, aug_assign, method_call
|
|
140
|
+
scope: str
|
|
141
|
+
context: str
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class DataFlow(BaseModel):
|
|
146
|
+
"""Represents data flow for a variable."""
|
|
147
|
+
variable: str
|
|
148
|
+
dependencies: Set[str] = field(default_factory=set)
|
|
149
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@dataclass
|
|
153
|
+
class AnalysisResult(BaseModel):
|
|
154
|
+
"""Complete analysis result for a project."""
|
|
155
|
+
project_path: str = ""
|
|
156
|
+
analysis_mode: str = "static"
|
|
157
|
+
stats: Dict[str, int] = field(default_factory=dict)
|
|
158
|
+
|
|
159
|
+
# Graph data
|
|
160
|
+
nodes: Dict[str, FlowNode] = field(default_factory=dict)
|
|
161
|
+
edges: List[FlowEdge] = field(default_factory=list)
|
|
162
|
+
|
|
163
|
+
# Code structure
|
|
164
|
+
modules: Dict[str, ModuleInfo] = field(default_factory=dict)
|
|
165
|
+
classes: Dict[str, ClassInfo] = field(default_factory=dict)
|
|
166
|
+
functions: Dict[str, FunctionInfo] = field(default_factory=dict)
|
|
167
|
+
|
|
168
|
+
# Analysis results
|
|
169
|
+
patterns: List[Pattern] = field(default_factory=list)
|
|
170
|
+
call_graph: Dict[str, List[str]] = field(default_factory=list)
|
|
171
|
+
entry_points: List[str] = field(default_factory=list)
|
|
172
|
+
data_flows: Dict[str, DataFlow] = field(default_factory=dict)
|
|
173
|
+
|
|
174
|
+
# Refactoring data
|
|
175
|
+
metrics: Dict[str, Dict[str, Any]] = field(default_factory=dict)
|
|
176
|
+
smells: List[CodeSmell] = field(default_factory=list)
|
|
177
|
+
coupling: Dict[str, Any] = field(default_factory=dict)
|
|
178
|
+
mutations: List[Mutation] = field(default_factory=list)
|
|
179
|
+
|
|
180
|
+
def get_function_count(self) -> int:
|
|
181
|
+
"""Get total function count."""
|
|
182
|
+
return len(self.functions)
|
|
183
|
+
|
|
184
|
+
def get_class_count(self) -> int:
|
|
185
|
+
"""Get total class count."""
|
|
186
|
+
return len(self.classes)
|
|
187
|
+
|
|
188
|
+
def get_node_count(self) -> int:
|
|
189
|
+
"""Get total CFG node count."""
|
|
190
|
+
return len(self.nodes)
|
|
191
|
+
|
|
192
|
+
def get_edge_count(self) -> int:
|
|
193
|
+
"""Get total edge count."""
|
|
194
|
+
return len(self.edges)
|