code2llm 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +23 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/pipeline_detector.py +445 -0
  10. code2flow/analysis/side_effects.py +313 -0
  11. code2flow/analysis/smells.py +192 -0
  12. code2flow/analysis/type_inference.py +306 -0
  13. code2flow/cli.py +493 -0
  14. code2flow/core/__init__.py +36 -0
  15. code2flow/core/analyzer.py +765 -0
  16. code2flow/core/config.py +177 -0
  17. code2flow/core/models.py +194 -0
  18. code2flow/core/streaming_analyzer.py +666 -0
  19. code2flow/exporters/__init__.py +35 -0
  20. code2flow/exporters/base.py +13 -0
  21. code2flow/exporters/context_exporter.py +207 -0
  22. code2flow/exporters/flow_exporter.py +570 -0
  23. code2flow/exporters/json_exporter.py +17 -0
  24. code2flow/exporters/llm_exporter.py +12 -0
  25. code2flow/exporters/map_exporter.py +218 -0
  26. code2flow/exporters/mermaid_exporter.py +67 -0
  27. code2flow/exporters/toon.py +982 -0
  28. code2flow/exporters/yaml_exporter.py +108 -0
  29. code2flow/llm_flow_generator.py +451 -0
  30. code2flow/llm_task_generator.py +263 -0
  31. code2flow/mermaid_generator.py +481 -0
  32. code2flow/nlp/__init__.py +23 -0
  33. code2flow/nlp/config.py +174 -0
  34. code2flow/nlp/entity_resolution.py +326 -0
  35. code2flow/nlp/intent_matching.py +297 -0
  36. code2flow/nlp/normalization.py +122 -0
  37. code2flow/nlp/pipeline.py +388 -0
  38. code2flow/patterns/__init__.py +0 -0
  39. code2flow/patterns/detector.py +168 -0
  40. code2flow/refactor/__init__.py +0 -0
  41. code2flow/refactor/prompt_engine.py +150 -0
  42. code2flow/visualizers/__init__.py +0 -0
  43. code2flow/visualizers/graph.py +196 -0
  44. code2llm-0.3.7.dist-info/METADATA +604 -0
  45. code2llm-0.3.7.dist-info/RECORD +49 -0
  46. code2llm-0.3.7.dist-info/WHEEL +5 -0
  47. code2llm-0.3.7.dist-info/entry_points.txt +2 -0
  48. code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
  49. code2llm-0.3.7.dist-info/top_level.txt +1 -0
code2flow/cli.py ADDED
@@ -0,0 +1,493 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ code2flow - CLI for Python code flow analysis
4
+
5
+ Analyze control flow, data flow, and call graphs of Python codebases.
6
+ """
7
+
8
+ import argparse
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ from .core.config import Config, ANALYSIS_MODES
13
+ from .core.analyzer import ProjectAnalyzer
14
+ from .exporters import (
15
+ YAMLExporter, JSONExporter, MermaidExporter,
16
+ ContextExporter, LLMPromptExporter,
17
+ ToonExporter, MapExporter, FlowExporter,
18
+ )
19
+ from .visualizers.graph import GraphVisualizer
20
+
21
+
22
+ def create_parser() -> argparse.ArgumentParser:
23
+ """Create CLI argument parser."""
24
+ parser = argparse.ArgumentParser(
25
+ prog='code2flow',
26
+ description='Analyze Python code control flow, data flow, and call graphs',
27
+ formatter_class=argparse.RawDescriptionHelpFormatter,
28
+ epilog='''
29
+ Examples:
30
+ code2flow /path/to/project # Default: TOON format only
31
+ code2flow /path/to/project -f all # Generate all formats
32
+ code2flow /path/to/project -f toon,map,flow # Diagnostics + structure + data-flow
33
+ code2flow /path/to/project -f context # LLM narrative context
34
+ code2flow /path/to/project -m static -o ./analysis
35
+ code2flow llm-flow # Generate LLM flow summary
36
+
37
+ Format Options:
38
+ toon - Health diagnostics (analysis.toon) — default
39
+ map - Structural map (map.toon) — modules, imports, signatures
40
+ flow - Data-flow analysis (flow.toon) — pipelines, contracts, types
41
+ context - LLM narrative (context.md) — architecture summary
42
+ yaml - Standard YAML format
43
+ json - Machine-readable JSON
44
+ mermaid - Flowchart diagrams
45
+ png - Visual graphs
46
+ all - Generate all formats
47
+ '''
48
+ )
49
+
50
+ # Add backward compatibility source argument first
51
+ parser.add_argument(
52
+ 'source',
53
+ nargs='?',
54
+ help='Path to Python source file or directory'
55
+ )
56
+
57
+ parser.add_argument(
58
+ '-m', '--mode',
59
+ choices=list(ANALYSIS_MODES.keys()),
60
+ default='hybrid',
61
+ help=f'Analysis mode (default: hybrid)'
62
+ )
63
+
64
+ parser.add_argument(
65
+ '-o', '--output',
66
+ default='./code2flow_output',
67
+ help='Output directory (default: ./code2flow_output)'
68
+ )
69
+
70
+ parser.add_argument(
71
+ '-f', '--format',
72
+ default='toon',
73
+ help='Output formats: toon,map,flow,context,yaml,json,mermaid,png,all (default: toon)'
74
+ )
75
+
76
+ parser.add_argument(
77
+ '--full',
78
+ action='store_true',
79
+ help='Include all fields in output (including empty/null values)'
80
+ )
81
+
82
+ parser.add_argument(
83
+ '--no-patterns',
84
+ action='store_true',
85
+ help='Disable pattern detection'
86
+ )
87
+
88
+ parser.add_argument(
89
+ '--max-depth',
90
+ type=int,
91
+ default=10,
92
+ help='Maximum analysis depth (default: 10)'
93
+ )
94
+
95
+ parser.add_argument(
96
+ '-v', '--verbose',
97
+ action='store_true',
98
+ help='Verbose output'
99
+ )
100
+
101
+ parser.add_argument(
102
+ '--no-png',
103
+ action='store_true',
104
+ help='Skip automatic PNG generation from Mermaid files'
105
+ )
106
+
107
+ parser.add_argument(
108
+ '--strategy',
109
+ choices=['quick', 'standard', 'deep'],
110
+ default='standard',
111
+ help='Analysis strategy: quick (fast overview), standard (balanced), deep (complete)'
112
+ )
113
+
114
+ parser.add_argument(
115
+ '--streaming',
116
+ action='store_true',
117
+ help='Use streaming analysis with progress reporting'
118
+ )
119
+
120
+ parser.add_argument(
121
+ '--incremental',
122
+ action='store_true',
123
+ help='Only analyze changed files (requires previous run)'
124
+ )
125
+
126
+ parser.add_argument(
127
+ '--max-memory',
128
+ type=int,
129
+ default=1000,
130
+ help='Max memory in MB (default: 1000)'
131
+ )
132
+
133
+ parser.add_argument(
134
+ '--split-output',
135
+ action='store_true',
136
+ help='Split YAML output into multiple files (summary, functions, classes, modules, entry_points)'
137
+ )
138
+
139
+ parser.add_argument(
140
+ '--separate-orphans',
141
+ action='store_true',
142
+ help='Separate consolidated project from orphaned/isolated functions into different folders'
143
+ )
144
+
145
+ parser.add_argument(
146
+ '--data-flow',
147
+ action='store_true',
148
+ help='Export data flow analysis (pipelines, state patterns, dependencies, events)'
149
+ )
150
+
151
+ parser.add_argument(
152
+ '--data-structures',
153
+ action='store_true',
154
+ help='Export data structure analysis (types, flows, optimization opportunities)'
155
+ )
156
+
157
+ parser.add_argument(
158
+ '--refactor',
159
+ action='store_true',
160
+ help='Enable AI-driven refactoring analysis and prompt generation'
161
+ )
162
+
163
+ parser.add_argument(
164
+ '--smell',
165
+ help='Filter refactoring by specific code smell (e.g., god_function, feature_envy)'
166
+ )
167
+
168
+ parser.add_argument(
169
+ '--llm-format',
170
+ choices=['claude', 'gpt', 'markdown'],
171
+ default='markdown',
172
+ help='Format for refactoring prompts (default: markdown)'
173
+ )
174
+
175
+ return parser
176
+
177
+
178
+ def main():
179
+ """Main CLI entry point."""
180
+ # Handle special cases first
181
+ if len(sys.argv) > 1 and sys.argv[1] == 'llm-flow':
182
+ from .llm_flow_generator import main as llm_flow_main
183
+ return llm_flow_main(sys.argv[2:])
184
+
185
+ if len(sys.argv) > 1 and sys.argv[1] == 'llm-context':
186
+ # Quick LLM context generation
187
+ return generate_llm_context(sys.argv[2:])
188
+
189
+ # For all other cases, use the regular parser
190
+ parser = create_parser()
191
+ args = parser.parse_args()
192
+
193
+ # Handle analysis (default behavior)
194
+ if not args.source:
195
+ print("Error: missing required argument: source", file=sys.stderr)
196
+ print("Usage: code2flow <source> [options]", file=sys.stderr)
197
+ print(" or: code2flow llm-flow [options]", file=sys.stderr)
198
+ sys.exit(2)
199
+
200
+ # Validate source path
201
+ source_path = Path(args.source)
202
+ if not source_path.exists():
203
+ print(f"Error: Source path not found: {source_path}", file=sys.stderr)
204
+ sys.exit(1)
205
+
206
+ # Create output directory
207
+ output_dir = Path(args.output)
208
+ output_dir.mkdir(parents=True, exist_ok=True)
209
+
210
+ # Configure analysis
211
+ config = Config(
212
+ mode=args.mode,
213
+ max_depth_enumeration=args.max_depth,
214
+ detect_state_machines=not args.no_patterns,
215
+ detect_recursion=not args.no_patterns,
216
+ output_dir=str(output_dir)
217
+ )
218
+
219
+ if args.verbose:
220
+ print(f"Analyzing: {source_path}")
221
+ print(f"Mode: {args.mode}")
222
+ print(f"Output: {output_dir}")
223
+
224
+ # Run analysis
225
+ try:
226
+ if args.streaming or args.strategy in ['quick', 'deep']:
227
+ # Use optimized streaming analyzer
228
+ from .core.streaming_analyzer import (
229
+ StreamingAnalyzer, STRATEGY_QUICK,
230
+ STRATEGY_STANDARD, STRATEGY_DEEP
231
+ )
232
+
233
+ strategy_map = {
234
+ 'quick': STRATEGY_QUICK,
235
+ 'standard': STRATEGY_STANDARD,
236
+ 'deep': STRATEGY_DEEP
237
+ }
238
+ strategy = strategy_map.get(args.strategy, STRATEGY_STANDARD)
239
+
240
+ # Adjust strategy for memory limit
241
+ strategy.max_files_in_memory = min(
242
+ strategy.max_files_in_memory,
243
+ args.max_memory // 10 # Rough heuristic
244
+ )
245
+
246
+ analyzer = StreamingAnalyzer(config, strategy)
247
+
248
+ if args.verbose:
249
+ def on_progress(update):
250
+ pct = update.get('percentage', 0)
251
+ print(f"\r[{pct:.0f}%] {update.get('message', '')}", end='', flush=True)
252
+ analyzer.set_progress_callback(on_progress)
253
+
254
+ # Collect results
255
+ functions = {}
256
+ classes = {}
257
+ nodes = {}
258
+ edges = []
259
+
260
+ print(f"Analyzing with {args.strategy} strategy...")
261
+ for update in analyzer.analyze_streaming(str(source_path)):
262
+ if update['type'] == 'file_complete':
263
+ # Result is yielded but we need to re-analyze for full data
264
+ pass
265
+ elif update['type'] == 'complete':
266
+ if args.verbose:
267
+ print() # New line after progress
268
+ print(f"Completed in {update.get('elapsed_seconds', 0):.1f}s")
269
+
270
+ # For streaming, we need to run again to get actual results
271
+ # TODO: Modify streaming to accumulate results properly
272
+ analyzer = ProjectAnalyzer(config)
273
+ result = analyzer.analyze_project(str(source_path))
274
+
275
+ else:
276
+ # Use standard analyzer
277
+ analyzer = ProjectAnalyzer(config)
278
+ result = analyzer.analyze_project(str(source_path))
279
+
280
+ if args.verbose:
281
+ print(f"\nAnalysis complete:")
282
+ print(f" - Functions: {len(result.functions)}")
283
+ print(f" - Classes: {len(result.classes)}")
284
+ print(f" - CFG nodes: {len(result.nodes)}")
285
+ print(f" - CFG edges: {len(result.edges)}")
286
+
287
+ except Exception as e:
288
+ print(f"Error during analysis: {e}", file=sys.stderr)
289
+ sys.exit(1)
290
+
291
+ # Export results
292
+ formats = [f.strip() for f in args.format.split(',')]
293
+
294
+ # Handle 'all' format
295
+ if 'all' in formats:
296
+ formats = ['toon', 'map', 'flow', 'context', 'yaml', 'json', 'mermaid', 'png']
297
+
298
+ try:
299
+ if 'toon' in formats:
300
+ exporter = ToonExporter()
301
+ filepath = output_dir / 'analysis.toon'
302
+ exporter.export(result, str(filepath))
303
+ if args.verbose:
304
+ print(f" - TOON (diagnostics): {filepath}")
305
+
306
+ if 'map' in formats:
307
+ exporter = MapExporter()
308
+ filepath = output_dir / 'map.toon'
309
+ exporter.export(result, str(filepath))
310
+ if args.verbose:
311
+ print(f" - MAP (structure): {filepath}")
312
+
313
+ if 'flow' in formats:
314
+ exporter = FlowExporter()
315
+ filepath = output_dir / 'flow.toon'
316
+ exporter.export(result, str(filepath))
317
+ if args.verbose:
318
+ print(f" - FLOW (data-flow): {filepath}")
319
+
320
+ if 'context' in formats:
321
+ exporter = ContextExporter()
322
+ filepath = output_dir / 'context.md'
323
+ exporter.export(result, str(filepath))
324
+ if args.verbose:
325
+ print(f" - CONTEXT (LLM narrative): {filepath}")
326
+
327
+ if 'yaml' in formats:
328
+ exporter = YAMLExporter()
329
+ if args.separate_orphans:
330
+ # Create separated output (consolidated vs orphans)
331
+ sep_dir = output_dir / 'separated'
332
+ exporter.export_separated(result, str(sep_dir), compact=True)
333
+ if args.verbose:
334
+ print(f" - YAML (separated): {sep_dir}/")
335
+ elif args.split_output:
336
+ # Create split output for large projects
337
+ split_dir = output_dir / 'split'
338
+ exporter.export_split(result, str(split_dir), include_defaults=args.full)
339
+ if args.verbose:
340
+ print(f" - YAML (split): {split_dir}/")
341
+ else:
342
+ filepath = output_dir / 'analysis.yaml'
343
+ exporter.export(result, str(filepath), include_defaults=args.full)
344
+ if args.verbose:
345
+ print(f" - YAML: {filepath}")
346
+
347
+ if 'json' in formats:
348
+ exporter = JSONExporter()
349
+ filepath = output_dir / 'analysis.json'
350
+ exporter.export(result, str(filepath), include_defaults=args.full)
351
+ if args.verbose:
352
+ print(f" - JSON: {filepath}")
353
+
354
+ if 'mermaid' in formats:
355
+ exporter = MermaidExporter()
356
+ filepath = output_dir / 'flow.mmd'
357
+ exporter.export(result, str(filepath))
358
+ filepath = output_dir / 'calls.mmd'
359
+ exporter.export_call_graph(result, str(filepath))
360
+ filepath = output_dir / 'compact_flow.mmd'
361
+ exporter.export_compact(result, str(filepath))
362
+ if args.verbose:
363
+ print(f" - Mermaid: {output_dir / '*.mmd'}")
364
+
365
+ # Auto-generate PNG from Mermaid files (unless disabled)
366
+ if not args.no_png:
367
+ try:
368
+ from .mermaid_generator import generate_pngs
369
+ png_count = generate_pngs(output_dir, output_dir)
370
+ if args.verbose and png_count > 0:
371
+ print(f" - PNG: {png_count} files generated")
372
+ except ImportError:
373
+ # Fallback to external script
374
+ try:
375
+ import subprocess
376
+ script_path = Path(__file__).parent.parent / 'mermaid_to_png.py'
377
+ if script_path.exists():
378
+ result = subprocess.run([
379
+ 'python', str(script_path),
380
+ '--batch', str(output_dir), str(output_dir)
381
+ ], capture_output=True, text=True, timeout=60)
382
+ if result.returncode == 0 and args.verbose:
383
+ print(f" - PNG: {output_dir / '*.png'}")
384
+ except Exception as png_error:
385
+ if args.verbose:
386
+ print(f" - PNG: Skipped (install with: make install-mermaid)")
387
+ elif args.verbose:
388
+ print(f" - PNG: Skipped (--no-png)")
389
+
390
+ if 'png' in formats:
391
+ visualizer = GraphVisualizer(result)
392
+ filepath = output_dir / 'cfg.png'
393
+ visualizer.visualize_cfg(str(filepath))
394
+ filepath = output_dir / 'call_graph.png'
395
+ visualizer.visualize_call_graph(str(filepath))
396
+ if args.verbose:
397
+ print(f" - PNG: {output_dir / '*.png'}")
398
+
399
+ if args.data_structures:
400
+ exporter = YAMLExporter()
401
+ struct_path = output_dir / 'data_structures.yaml'
402
+ exporter.export_data_structures(result, str(struct_path), compact=True)
403
+ if args.verbose:
404
+ print(f" - Data structures: {struct_path}")
405
+
406
+ # Generate LLM context (backward compat: always generate context.md)
407
+ if 'context' not in formats:
408
+ exporter = ContextExporter()
409
+ filepath = output_dir / 'context.md'
410
+ exporter.export(result, str(filepath))
411
+ if args.verbose:
412
+ print(f" - CONTEXT (LLM narrative): {filepath}")
413
+
414
+ # New: AI-driven refactoring prompts
415
+ if args.refactor:
416
+ from .refactor.prompt_engine import PromptEngine
417
+ prompt_engine = PromptEngine(result)
418
+ prompts = prompt_engine.generate_prompts()
419
+
420
+ if prompts:
421
+ prompts_dir = output_dir / 'prompts'
422
+ prompts_dir.mkdir(parents=True, exist_ok=True)
423
+
424
+ # Filter by smell if requested
425
+ if args.smell:
426
+ prompts = {k: v for k, v in prompts.items() if args.smell in k.lower()}
427
+
428
+ for filename, content in prompts.items():
429
+ prompt_path = prompts_dir / filename
430
+ prompt_path.write_text(content)
431
+
432
+ if args.verbose:
433
+ print(f" - Refactoring prompts: {prompts_dir}/ ({len(prompts)} files)")
434
+ else:
435
+ if args.verbose:
436
+ print(" - Refactoring: No code smells detected.")
437
+
438
+ except Exception as e:
439
+ print(f"Error during export: {e}", file=sys.stderr)
440
+ sys.exit(1)
441
+
442
+ if args.verbose:
443
+ print(f"\nAll outputs saved to: {output_dir}")
444
+
445
+ return 0
446
+
447
+
448
+ def generate_llm_context(args_list):
449
+ """Quick command to generate LLM context only."""
450
+ import argparse
451
+
452
+ parser = argparse.ArgumentParser(
453
+ prog='code2flow llm-context',
454
+ description='Generate LLM-friendly context for a project'
455
+ )
456
+ parser.add_argument('source', help='Path to Python project')
457
+ parser.add_argument('-o', '--output', default='./llm_context.md', help='Output file path')
458
+ parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
459
+
460
+ args = parser.parse_args(args_list)
461
+
462
+ from pathlib import Path
463
+ from . import ProjectAnalyzer, FAST_CONFIG
464
+ from .exporters import ContextExporter
465
+
466
+ source_path = Path(args.source)
467
+ if not source_path.exists():
468
+ print(f"Error: Source path not found: {source_path}", file=sys.stderr)
469
+ return 1
470
+
471
+ if args.verbose:
472
+ print(f"Generating LLM context for: {source_path}")
473
+
474
+ # Use fast config with parallel disabled for stability
475
+ FAST_CONFIG.performance.parallel_enabled = False
476
+
477
+ analyzer = ProjectAnalyzer(FAST_CONFIG)
478
+ result = analyzer.analyze_project(str(source_path))
479
+
480
+ exporter = ContextExporter()
481
+ exporter.export(result, args.output)
482
+
483
+ # Print summary
484
+ print(f"\n✓ LLM context generated: {args.output}")
485
+ print(f" Functions: {len(result.functions)}")
486
+ print(f" Classes: {len(result.classes)}")
487
+ print(f" Modules: {len(result.modules)}")
488
+
489
+ return 0
490
+
491
+
492
+ if __name__ == '__main__':
493
+ sys.exit(main())
@@ -0,0 +1,36 @@
1
+ """Core analysis components for code2flow."""
2
+
3
+ from .analyzer import ProjectAnalyzer, FileCache, FastFileFilter
4
+ from .streaming_analyzer import (
5
+ StreamingAnalyzer,
6
+ IncrementalAnalyzer,
7
+ ScanStrategy,
8
+ SmartPrioritizer,
9
+ STRATEGY_QUICK,
10
+ STRATEGY_STANDARD,
11
+ STRATEGY_DEEP
12
+ )
13
+ from .config import Config, FAST_CONFIG, PerformanceConfig, FilterConfig
14
+ from .models import (
15
+ AnalysisResult, FlowNode, FlowEdge,
16
+ FunctionInfo, ClassInfo, ModuleInfo, Pattern
17
+ )
18
+
19
+ __all__ = [
20
+ 'ProjectAnalyzer',
21
+ 'StreamingAnalyzer',
22
+ 'IncrementalAnalyzer',
23
+ 'ScanStrategy',
24
+ 'SmartPrioritizer',
25
+ 'STRATEGY_QUICK',
26
+ 'STRATEGY_STANDARD',
27
+ 'STRATEGY_DEEP',
28
+ 'FileCache',
29
+ 'FastFileFilter',
30
+ 'Config',
31
+ 'FAST_CONFIG',
32
+ 'AnalysisResult',
33
+ 'FunctionInfo',
34
+ 'ClassInfo',
35
+ 'ModuleInfo',
36
+ ]