code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +17 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/smells.py +192 -0
  10. code2flow/cli.py +464 -0
  11. code2flow/core/__init__.py +36 -0
  12. code2flow/core/analyzer.py +765 -0
  13. code2flow/core/config.py +177 -0
  14. code2flow/core/models.py +194 -0
  15. code2flow/core/streaming_analyzer.py +666 -0
  16. code2flow/exporters/__init__.py +17 -0
  17. code2flow/exporters/base.py +13 -0
  18. code2flow/exporters/json_exporter.py +17 -0
  19. code2flow/exporters/llm_exporter.py +199 -0
  20. code2flow/exporters/mermaid_exporter.py +67 -0
  21. code2flow/exporters/toon.py +401 -0
  22. code2flow/exporters/yaml_exporter.py +108 -0
  23. code2flow/llm_flow_generator.py +451 -0
  24. code2flow/llm_task_generator.py +263 -0
  25. code2flow/mermaid_generator.py +481 -0
  26. code2flow/nlp/__init__.py +23 -0
  27. code2flow/nlp/config.py +174 -0
  28. code2flow/nlp/entity_resolution.py +326 -0
  29. code2flow/nlp/intent_matching.py +297 -0
  30. code2flow/nlp/normalization.py +122 -0
  31. code2flow/nlp/pipeline.py +388 -0
  32. code2flow/patterns/__init__.py +0 -0
  33. code2flow/patterns/detector.py +168 -0
  34. code2flow/refactor/__init__.py +0 -0
  35. code2flow/refactor/prompt_engine.py +150 -0
  36. code2flow/visualizers/__init__.py +0 -0
  37. code2flow/visualizers/graph.py +196 -0
  38. code2flow_toon-0.2.4.dist-info/METADATA +599 -0
  39. code2flow_toon-0.2.4.dist-info/RECORD +43 -0
  40. code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
  41. code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
  42. code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
  43. code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
code2flow/cli.py ADDED
@@ -0,0 +1,464 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ code2flow - CLI for Python code flow analysis
4
+
5
+ Analyze control flow, data flow, and call graphs of Python codebases.
6
+ """
7
+
8
+ import argparse
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ from .core.config import Config, ANALYSIS_MODES
13
+ from .core.analyzer import ProjectAnalyzer
14
+ from .exporters import YAMLExporter, JSONExporter, MermaidExporter, LLMPromptExporter, ToonExporter
15
+ from .visualizers.graph import GraphVisualizer
16
+
17
+
18
+ def create_parser() -> argparse.ArgumentParser:
19
+ """Create CLI argument parser."""
20
+ parser = argparse.ArgumentParser(
21
+ prog='code2flow',
22
+ description='Analyze Python code control flow, data flow, and call graphs',
23
+ formatter_class=argparse.RawDescriptionHelpFormatter,
24
+ epilog='''
25
+ Examples:
26
+ code2flow /path/to/project # Default: TOON format only
27
+ code2flow /path/to/project -f all # Generate all formats
28
+ code2flow /path/to/project -f toon,yaml # TOON + YAML
29
+ code2flow /path/to/project -f json,png # JSON + PNG
30
+ code2flow /path/to/project -m static -o ./analysis
31
+ code2flow llm-flow # Generate LLM flow summary
32
+
33
+ Format Options:
34
+ toon - Optimized compact format (default)
35
+ yaml - Standard YAML format
36
+ json - Machine-readable JSON
37
+ mermaid - Flowchart diagrams
38
+ png - Visual graphs
39
+ all - Generate all formats
40
+ '''
41
+ )
42
+
43
+ # Add backward compatibility source argument first
44
+ parser.add_argument(
45
+ 'source',
46
+ nargs='?',
47
+ help='Path to Python source file or directory'
48
+ )
49
+
50
+ parser.add_argument(
51
+ '-m', '--mode',
52
+ choices=list(ANALYSIS_MODES.keys()),
53
+ default='hybrid',
54
+ help=f'Analysis mode (default: hybrid)'
55
+ )
56
+
57
+ parser.add_argument(
58
+ '-o', '--output',
59
+ default='./code2flow_output',
60
+ help='Output directory (default: ./code2flow_output)'
61
+ )
62
+
63
+ parser.add_argument(
64
+ '-f', '--format',
65
+ default='toon',
66
+ help='Output formats: toon,yaml,json,mermaid,png,all (default: toon)'
67
+ )
68
+
69
+ parser.add_argument(
70
+ '--full',
71
+ action='store_true',
72
+ help='Include all fields in output (including empty/null values)'
73
+ )
74
+
75
+ parser.add_argument(
76
+ '--no-patterns',
77
+ action='store_true',
78
+ help='Disable pattern detection'
79
+ )
80
+
81
+ parser.add_argument(
82
+ '--max-depth',
83
+ type=int,
84
+ default=10,
85
+ help='Maximum analysis depth (default: 10)'
86
+ )
87
+
88
+ parser.add_argument(
89
+ '-v', '--verbose',
90
+ action='store_true',
91
+ help='Verbose output'
92
+ )
93
+
94
+ parser.add_argument(
95
+ '--no-png',
96
+ action='store_true',
97
+ help='Skip automatic PNG generation from Mermaid files'
98
+ )
99
+
100
+ parser.add_argument(
101
+ '--strategy',
102
+ choices=['quick', 'standard', 'deep'],
103
+ default='standard',
104
+ help='Analysis strategy: quick (fast overview), standard (balanced), deep (complete)'
105
+ )
106
+
107
+ parser.add_argument(
108
+ '--streaming',
109
+ action='store_true',
110
+ help='Use streaming analysis with progress reporting'
111
+ )
112
+
113
+ parser.add_argument(
114
+ '--incremental',
115
+ action='store_true',
116
+ help='Only analyze changed files (requires previous run)'
117
+ )
118
+
119
+ parser.add_argument(
120
+ '--max-memory',
121
+ type=int,
122
+ default=1000,
123
+ help='Max memory in MB (default: 1000)'
124
+ )
125
+
126
+ parser.add_argument(
127
+ '--split-output',
128
+ action='store_true',
129
+ help='Split YAML output into multiple files (summary, functions, classes, modules, entry_points)'
130
+ )
131
+
132
+ parser.add_argument(
133
+ '--separate-orphans',
134
+ action='store_true',
135
+ help='Separate consolidated project from orphaned/isolated functions into different folders'
136
+ )
137
+
138
+ parser.add_argument(
139
+ '--data-flow',
140
+ action='store_true',
141
+ help='Export data flow analysis (pipelines, state patterns, dependencies, events)'
142
+ )
143
+
144
+ parser.add_argument(
145
+ '--data-structures',
146
+ action='store_true',
147
+ help='Export data structure analysis (types, flows, optimization opportunities)'
148
+ )
149
+
150
+ parser.add_argument(
151
+ '--refactor',
152
+ action='store_true',
153
+ help='Enable AI-driven refactoring analysis and prompt generation'
154
+ )
155
+
156
+ parser.add_argument(
157
+ '--smell',
158
+ help='Filter refactoring by specific code smell (e.g., god_function, feature_envy)'
159
+ )
160
+
161
+ parser.add_argument(
162
+ '--llm-format',
163
+ choices=['claude', 'gpt', 'markdown'],
164
+ default='markdown',
165
+ help='Format for refactoring prompts (default: markdown)'
166
+ )
167
+
168
+ return parser
169
+
170
+
171
+ def main():
172
+ """Main CLI entry point."""
173
+ # Handle special cases first
174
+ if len(sys.argv) > 1 and sys.argv[1] == 'llm-flow':
175
+ from .llm_flow_generator import main as llm_flow_main
176
+ return llm_flow_main(sys.argv[2:])
177
+
178
+ if len(sys.argv) > 1 and sys.argv[1] == 'llm-context':
179
+ # Quick LLM context generation
180
+ return generate_llm_context(sys.argv[2:])
181
+
182
+ # For all other cases, use the regular parser
183
+ parser = create_parser()
184
+ args = parser.parse_args()
185
+
186
+ # Handle analysis (default behavior)
187
+ if not args.source:
188
+ print("Error: missing required argument: source", file=sys.stderr)
189
+ print("Usage: code2flow <source> [options]", file=sys.stderr)
190
+ print(" or: code2flow llm-flow [options]", file=sys.stderr)
191
+ sys.exit(2)
192
+
193
+ # Validate source path
194
+ source_path = Path(args.source)
195
+ if not source_path.exists():
196
+ print(f"Error: Source path not found: {source_path}", file=sys.stderr)
197
+ sys.exit(1)
198
+
199
+ # Create output directory
200
+ output_dir = Path(args.output)
201
+ output_dir.mkdir(parents=True, exist_ok=True)
202
+
203
+ # Configure analysis
204
+ config = Config(
205
+ mode=args.mode,
206
+ max_depth_enumeration=args.max_depth,
207
+ detect_state_machines=not args.no_patterns,
208
+ detect_recursion=not args.no_patterns,
209
+ output_dir=str(output_dir)
210
+ )
211
+
212
+ if args.verbose:
213
+ print(f"Analyzing: {source_path}")
214
+ print(f"Mode: {args.mode}")
215
+ print(f"Output: {output_dir}")
216
+
217
+ # Run analysis
218
+ try:
219
+ if args.streaming or args.strategy in ['quick', 'deep']:
220
+ # Use optimized streaming analyzer
221
+ from .core.streaming_analyzer import (
222
+ StreamingAnalyzer, STRATEGY_QUICK,
223
+ STRATEGY_STANDARD, STRATEGY_DEEP
224
+ )
225
+
226
+ strategy_map = {
227
+ 'quick': STRATEGY_QUICK,
228
+ 'standard': STRATEGY_STANDARD,
229
+ 'deep': STRATEGY_DEEP
230
+ }
231
+ strategy = strategy_map.get(args.strategy, STRATEGY_STANDARD)
232
+
233
+ # Adjust strategy for memory limit
234
+ strategy.max_files_in_memory = min(
235
+ strategy.max_files_in_memory,
236
+ args.max_memory // 10 # Rough heuristic
237
+ )
238
+
239
+ analyzer = StreamingAnalyzer(config, strategy)
240
+
241
+ if args.verbose:
242
+ def on_progress(update):
243
+ pct = update.get('percentage', 0)
244
+ print(f"\r[{pct:.0f}%] {update.get('message', '')}", end='', flush=True)
245
+ analyzer.set_progress_callback(on_progress)
246
+
247
+ # Collect results
248
+ functions = {}
249
+ classes = {}
250
+ nodes = {}
251
+ edges = []
252
+
253
+ print(f"Analyzing with {args.strategy} strategy...")
254
+ for update in analyzer.analyze_streaming(str(source_path)):
255
+ if update['type'] == 'file_complete':
256
+ # Result is yielded but we need to re-analyze for full data
257
+ pass
258
+ elif update['type'] == 'complete':
259
+ if args.verbose:
260
+ print() # New line after progress
261
+ print(f"Completed in {update.get('elapsed_seconds', 0):.1f}s")
262
+
263
+ # For streaming, we need to run again to get actual results
264
+ # TODO: Modify streaming to accumulate results properly
265
+ analyzer = ProjectAnalyzer(config)
266
+ result = analyzer.analyze_project(str(source_path))
267
+
268
+ else:
269
+ # Use standard analyzer
270
+ analyzer = ProjectAnalyzer(config)
271
+ result = analyzer.analyze_project(str(source_path))
272
+
273
+ if args.verbose:
274
+ print(f"\nAnalysis complete:")
275
+ print(f" - Functions: {len(result.functions)}")
276
+ print(f" - Classes: {len(result.classes)}")
277
+ print(f" - CFG nodes: {len(result.nodes)}")
278
+ print(f" - CFG edges: {len(result.edges)}")
279
+
280
+ except Exception as e:
281
+ print(f"Error during analysis: {e}", file=sys.stderr)
282
+ sys.exit(1)
283
+
284
+ # Export results
285
+ formats = [f.strip() for f in args.format.split(',')]
286
+
287
+ # Handle 'all' format
288
+ if 'all' in formats:
289
+ formats = ['toon', 'yaml', 'json', 'mermaid', 'png']
290
+
291
+ try:
292
+ if 'toon' in formats:
293
+ exporter = ToonExporter()
294
+ filepath = output_dir / 'analysis.toon'
295
+ exporter.export(result, str(filepath))
296
+ if args.verbose:
297
+ print(f" - TOON: {filepath}")
298
+
299
+ if 'yaml' in formats:
300
+ exporter = YAMLExporter()
301
+ if args.separate_orphans:
302
+ # Create separated output (consolidated vs orphans)
303
+ sep_dir = output_dir / 'separated'
304
+ exporter.export_separated(result, str(sep_dir), compact=True)
305
+ if args.verbose:
306
+ print(f" - YAML (separated): {sep_dir}/")
307
+ elif args.split_output:
308
+ # Create split output for large projects
309
+ split_dir = output_dir / 'split'
310
+ exporter.export_split(result, str(split_dir), include_defaults=args.full)
311
+ if args.verbose:
312
+ print(f" - YAML (split): {split_dir}/")
313
+ else:
314
+ filepath = output_dir / 'analysis.yaml'
315
+ exporter.export(result, str(filepath), include_defaults=args.full)
316
+ if args.verbose:
317
+ print(f" - YAML: {filepath}")
318
+
319
+ if 'json' in formats:
320
+ exporter = JSONExporter()
321
+ filepath = output_dir / 'analysis.json'
322
+ exporter.export(result, str(filepath), include_defaults=args.full)
323
+ if args.verbose:
324
+ print(f" - JSON: {filepath}")
325
+
326
+ if 'mermaid' in formats:
327
+ exporter = MermaidExporter()
328
+ filepath = output_dir / 'flow.mmd'
329
+ exporter.export(result, str(filepath))
330
+ filepath = output_dir / 'calls.mmd'
331
+ exporter.export_call_graph(result, str(filepath))
332
+ filepath = output_dir / 'compact_flow.mmd'
333
+ exporter.export_compact(result, str(filepath))
334
+ if args.verbose:
335
+ print(f" - Mermaid: {output_dir / '*.mmd'}")
336
+
337
+ # Auto-generate PNG from Mermaid files (unless disabled)
338
+ if not args.no_png:
339
+ try:
340
+ from .mermaid_generator import generate_pngs
341
+ png_count = generate_pngs(output_dir, output_dir)
342
+ if args.verbose and png_count > 0:
343
+ print(f" - PNG: {png_count} files generated")
344
+ except ImportError:
345
+ # Fallback to external script
346
+ try:
347
+ import subprocess
348
+ script_path = Path(__file__).parent.parent / 'mermaid_to_png.py'
349
+ if script_path.exists():
350
+ result = subprocess.run([
351
+ 'python', str(script_path),
352
+ '--batch', str(output_dir), str(output_dir)
353
+ ], capture_output=True, text=True, timeout=60)
354
+ if result.returncode == 0 and args.verbose:
355
+ print(f" - PNG: {output_dir / '*.png'}")
356
+ except Exception as png_error:
357
+ if args.verbose:
358
+ print(f" - PNG: Skipped (install with: make install-mermaid)")
359
+ elif args.verbose:
360
+ print(f" - PNG: Skipped (--no-png)")
361
+
362
+ if 'png' in formats:
363
+ visualizer = GraphVisualizer(result)
364
+ filepath = output_dir / 'cfg.png'
365
+ visualizer.visualize_cfg(str(filepath))
366
+ filepath = output_dir / 'call_graph.png'
367
+ visualizer.visualize_call_graph(str(filepath))
368
+ if args.verbose:
369
+ print(f" - PNG: {output_dir / '*.png'}")
370
+
371
+ if args.data_structures:
372
+ exporter = YAMLExporter()
373
+ struct_path = output_dir / 'data_structures.yaml'
374
+ exporter.export_data_structures(result, str(struct_path), compact=True)
375
+ if args.verbose:
376
+ print(f" - Data structures: {struct_path}")
377
+
378
+ # Always generate LLM prompt
379
+ exporter = LLMPromptExporter()
380
+ filepath = output_dir / 'llm_prompt.md'
381
+ exporter.export(result, str(filepath))
382
+ if args.verbose:
383
+ print(f" - LLM prompt: {filepath}")
384
+
385
+ # New: AI-driven refactoring prompts
386
+ if args.refactor:
387
+ from .refactor.prompt_engine import PromptEngine
388
+ prompt_engine = PromptEngine(result)
389
+ prompts = prompt_engine.generate_prompts()
390
+
391
+ if prompts:
392
+ prompts_dir = output_dir / 'prompts'
393
+ prompts_dir.mkdir(parents=True, exist_ok=True)
394
+
395
+ # Filter by smell if requested
396
+ if args.smell:
397
+ prompts = {k: v for k, v in prompts.items() if args.smell in k.lower()}
398
+
399
+ for filename, content in prompts.items():
400
+ prompt_path = prompts_dir / filename
401
+ prompt_path.write_text(content)
402
+
403
+ if args.verbose:
404
+ print(f" - Refactoring prompts: {prompts_dir}/ ({len(prompts)} files)")
405
+ else:
406
+ if args.verbose:
407
+ print(" - Refactoring: No code smells detected.")
408
+
409
+ except Exception as e:
410
+ print(f"Error during export: {e}", file=sys.stderr)
411
+ sys.exit(1)
412
+
413
+ if args.verbose:
414
+ print(f"\nAll outputs saved to: {output_dir}")
415
+
416
+ return 0
417
+
418
+
419
+ def generate_llm_context(args_list):
420
+ """Quick command to generate LLM context only."""
421
+ import argparse
422
+
423
+ parser = argparse.ArgumentParser(
424
+ prog='code2flow llm-context',
425
+ description='Generate LLM-friendly context for a project'
426
+ )
427
+ parser.add_argument('source', help='Path to Python project')
428
+ parser.add_argument('-o', '--output', default='./llm_context.md', help='Output file path')
429
+ parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
430
+
431
+ args = parser.parse_args(args_list)
432
+
433
+ from pathlib import Path
434
+ from . import ProjectAnalyzer, FAST_CONFIG
435
+ from .exporters import LLMPromptExporter
436
+
437
+ source_path = Path(args.source)
438
+ if not source_path.exists():
439
+ print(f"Error: Source path not found: {source_path}", file=sys.stderr)
440
+ return 1
441
+
442
+ if args.verbose:
443
+ print(f"Generating LLM context for: {source_path}")
444
+
445
+ # Use fast config with parallel disabled for stability
446
+ FAST_CONFIG.performance.parallel_enabled = False
447
+
448
+ analyzer = ProjectAnalyzer(FAST_CONFIG)
449
+ result = analyzer.analyze_project(str(source_path))
450
+
451
+ exporter = LLMPromptExporter()
452
+ exporter.export(result, args.output)
453
+
454
+ # Print summary
455
+ print(f"\n✓ LLM context generated: {args.output}")
456
+ print(f" Functions: {len(result.functions)}")
457
+ print(f" Classes: {len(result.classes)}")
458
+ print(f" Modules: {len(result.modules)}")
459
+
460
+ return 0
461
+
462
+
463
+ if __name__ == '__main__':
464
+ sys.exit(main())
@@ -0,0 +1,36 @@
1
+ """Core analysis components for code2flow."""
2
+
3
+ from .analyzer import ProjectAnalyzer, FileCache, FastFileFilter
4
+ from .streaming_analyzer import (
5
+ StreamingAnalyzer,
6
+ IncrementalAnalyzer,
7
+ ScanStrategy,
8
+ SmartPrioritizer,
9
+ STRATEGY_QUICK,
10
+ STRATEGY_STANDARD,
11
+ STRATEGY_DEEP
12
+ )
13
+ from .config import Config, FAST_CONFIG, PerformanceConfig, FilterConfig
14
+ from .models import (
15
+ AnalysisResult, FlowNode, FlowEdge,
16
+ FunctionInfo, ClassInfo, ModuleInfo, Pattern
17
+ )
18
+
19
+ __all__ = [
20
+ 'ProjectAnalyzer',
21
+ 'StreamingAnalyzer',
22
+ 'IncrementalAnalyzer',
23
+ 'ScanStrategy',
24
+ 'SmartPrioritizer',
25
+ 'STRATEGY_QUICK',
26
+ 'STRATEGY_STANDARD',
27
+ 'STRATEGY_DEEP',
28
+ 'FileCache',
29
+ 'FastFileFilter',
30
+ 'Config',
31
+ 'FAST_CONFIG',
32
+ 'AnalysisResult',
33
+ 'FunctionInfo',
34
+ 'ClassInfo',
35
+ 'ModuleInfo',
36
+ ]