code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +17 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/smells.py +192 -0
  10. code2flow/cli.py +464 -0
  11. code2flow/core/__init__.py +36 -0
  12. code2flow/core/analyzer.py +765 -0
  13. code2flow/core/config.py +177 -0
  14. code2flow/core/models.py +194 -0
  15. code2flow/core/streaming_analyzer.py +666 -0
  16. code2flow/exporters/__init__.py +17 -0
  17. code2flow/exporters/base.py +13 -0
  18. code2flow/exporters/json_exporter.py +17 -0
  19. code2flow/exporters/llm_exporter.py +199 -0
  20. code2flow/exporters/mermaid_exporter.py +67 -0
  21. code2flow/exporters/toon.py +401 -0
  22. code2flow/exporters/yaml_exporter.py +108 -0
  23. code2flow/llm_flow_generator.py +451 -0
  24. code2flow/llm_task_generator.py +263 -0
  25. code2flow/mermaid_generator.py +481 -0
  26. code2flow/nlp/__init__.py +23 -0
  27. code2flow/nlp/config.py +174 -0
  28. code2flow/nlp/entity_resolution.py +326 -0
  29. code2flow/nlp/intent_matching.py +297 -0
  30. code2flow/nlp/normalization.py +122 -0
  31. code2flow/nlp/pipeline.py +388 -0
  32. code2flow/patterns/__init__.py +0 -0
  33. code2flow/patterns/detector.py +168 -0
  34. code2flow/refactor/__init__.py +0 -0
  35. code2flow/refactor/prompt_engine.py +150 -0
  36. code2flow/visualizers/__init__.py +0 -0
  37. code2flow/visualizers/graph.py +196 -0
  38. code2flow_toon-0.2.4.dist-info/METADATA +599 -0
  39. code2flow_toon-0.2.4.dist-info/RECORD +43 -0
  40. code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
  41. code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
  42. code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
  43. code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,666 @@
1
+ """Optimized streaming analyzer with prioritization and progress tracking.
2
+
3
+ Key optimizations:
4
+ 1. Lazy CFG generation - only build when needed
5
+ 2. Memory-bounded analysis with streaming output
6
+ 3. Smart file prioritization (entry points, public API first)
7
+ 4. Incremental analysis with change detection
8
+ 5. Progress reporting with ETA
9
+ """
10
+
11
+ import ast
12
+ import hashlib
13
+ import json
14
+ import os
15
+ import time
16
+ from collections import defaultdict
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Dict, List, Optional, Set, Tuple, Iterator
20
+ import fnmatch
21
+
22
+ from .config import Config, FAST_CONFIG
23
+ from .models import (
24
+ AnalysisResult, ClassInfo, FlowEdge, FlowNode,
25
+ FunctionInfo, ModuleInfo, Pattern
26
+ )
27
+
28
+
29
+ @dataclass
30
+ class FilePriority:
31
+ """Priority scoring for file analysis order."""
32
+ file_path: str
33
+ module_name: str
34
+ priority_score: float
35
+ reasons: List[str] = field(default_factory=list)
36
+
37
+ # Priority factors
38
+ is_entry_point: bool = False
39
+ is_public_api: bool = False
40
+ has_main: bool = False
41
+ import_count: int = 0
42
+ lines_of_code: int = 0
43
+
44
+
45
+ @dataclass
46
+ class ScanStrategy:
47
+ """Scanning methodology configuration."""
48
+ name: str
49
+ description: str
50
+
51
+ # Analysis phases
52
+ phase_1_quick_scan: bool = True # Only functions/classes, no CFG
53
+ phase_2_call_graph: bool = True # Build call relationships
54
+ phase_3_deep_analysis: bool = False # Full CFG only for important files
55
+ phase_4_patterns: bool = False # Pattern detection
56
+
57
+ # Memory limits
58
+ max_files_in_memory: int = 100
59
+ max_nodes_per_function: int = 50
60
+ max_total_nodes: int = 10000
61
+
62
+ # Prioritization
63
+ prioritize_entry_points: bool = True
64
+ prioritize_public_api: bool = True
65
+ skip_private_functions: bool = True
66
+ skip_test_files: bool = True
67
+
68
+ # Output
69
+ streaming_output: bool = True
70
+ incremental_save: bool = True
71
+
72
+
73
+ # Predefined strategies
74
+ STRATEGY_QUICK = ScanStrategy(
75
+ name="quick",
76
+ description="Fast overview - functions/classes only, no CFG",
77
+ phase_1_quick_scan=True,
78
+ phase_2_call_graph=True,
79
+ phase_3_deep_analysis=False,
80
+ phase_4_patterns=False,
81
+ max_files_in_memory=200,
82
+ skip_private_functions=True,
83
+ )
84
+
85
+ STRATEGY_STANDARD = ScanStrategy(
86
+ name="standard",
87
+ description="Balanced analysis with selective CFG",
88
+ phase_1_quick_scan=True,
89
+ phase_2_call_graph=True,
90
+ phase_3_deep_analysis=True,
91
+ phase_4_patterns=True,
92
+ max_files_in_memory=100,
93
+ max_nodes_per_function=30,
94
+ prioritize_entry_points=True,
95
+ )
96
+
97
+ STRATEGY_DEEP = ScanStrategy(
98
+ name="deep",
99
+ description="Complete analysis with full CFG for all files",
100
+ phase_1_quick_scan=True,
101
+ phase_2_call_graph=True,
102
+ phase_3_deep_analysis=True,
103
+ phase_4_patterns=True,
104
+ max_files_in_memory=50,
105
+ max_nodes_per_function=100,
106
+ prioritize_entry_points=True,
107
+ )
108
+
109
+
110
+ class StreamingFileCache:
111
+ """Memory-efficient cache with LRU eviction."""
112
+
113
+ def __init__(self, max_size: int = 100, cache_dir: str = ".code2flow_cache"):
114
+ self.max_size = max_size
115
+ self.cache_dir = Path(cache_dir)
116
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
117
+ self._memory_cache: Dict[str, tuple] = {}
118
+ self._access_order: List[str] = []
119
+
120
+ def _get_cache_key(self, file_path: str, content: str) -> str:
121
+ """Generate cache key."""
122
+ content_hash = hashlib.md5(content.encode()).hexdigest()[:16]
123
+ return f"{Path(file_path).stem}_{content_hash}"
124
+
125
+ def _evict_if_needed(self) -> None:
126
+ """Evict oldest entries if cache is full."""
127
+ while len(self._memory_cache) >= self.max_size:
128
+ if self._access_order:
129
+ oldest = self._access_order.pop(0)
130
+ if oldest in self._memory_cache:
131
+ del self._memory_cache[oldest]
132
+
133
+ def get(self, file_path: str, content: str) -> Optional[Tuple[ast.AST, str]]:
134
+ """Get from cache with LRU tracking."""
135
+ key = self._get_cache_key(file_path, content)
136
+
137
+ if key in self._memory_cache:
138
+ # Move to end (most recently used)
139
+ if key in self._access_order:
140
+ self._access_order.remove(key)
141
+ self._access_order.append(key)
142
+ return self._memory_cache[key]
143
+
144
+ return None
145
+
146
+ def put(self, file_path: str, content: str, data: Tuple[ast.AST, str]) -> None:
147
+ """Store in cache with LRU management."""
148
+ self._evict_if_needed()
149
+
150
+ key = self._get_cache_key(file_path, content)
151
+ self._memory_cache[key] = data
152
+ self._access_order.append(key)
153
+
154
+
155
+ class SmartPrioritizer:
156
+ """Smart file prioritization for optimal analysis order."""
157
+
158
+ def __init__(self, strategy: ScanStrategy):
159
+ self.strategy = strategy
160
+
161
+ def prioritize_files(
162
+ self,
163
+ files: List[Tuple[str, str]],
164
+ project_path: Path
165
+ ) -> List[FilePriority]:
166
+ """Score and sort files by importance."""
167
+ scored = []
168
+
169
+ # First pass: gather import relationships
170
+ import_graph = self._build_import_graph(files, project_path)
171
+
172
+ for file_path, module_name in files:
173
+ score = 0.0
174
+ reasons = []
175
+
176
+ # Check if has main
177
+ has_main = self._check_has_main(file_path)
178
+ if has_main:
179
+ score += 100.0
180
+ reasons.append("has_main")
181
+
182
+ # Check if entry point (not imported by others)
183
+ is_entry = module_name not in import_graph or len(import_graph[module_name]) == 0
184
+ if is_entry:
185
+ score += 50.0
186
+ reasons.append("entry_point")
187
+
188
+ # Check if public API (no underscore prefix)
189
+ is_public = not any(part.startswith('_') for part in module_name.split('.'))
190
+ if is_public:
191
+ score += 20.0
192
+ reasons.append("public_api")
193
+
194
+ # Import count (more imports = more central)
195
+ import_count = len(import_graph.get(module_name, []))
196
+ score += import_count * 5.0
197
+
198
+ # File size (prefer smaller files first for quick wins)
199
+ try:
200
+ loc = len(Path(file_path).read_text().split('\n'))
201
+ if loc < 100:
202
+ score += 10.0
203
+ reasons.append("small_file")
204
+ except:
205
+ loc = 0
206
+
207
+ priority = FilePriority(
208
+ file_path=file_path,
209
+ module_name=module_name,
210
+ priority_score=score,
211
+ reasons=reasons,
212
+ is_entry_point=is_entry,
213
+ is_public_api=is_public,
214
+ has_main=has_main,
215
+ import_count=import_count,
216
+ lines_of_code=loc
217
+ )
218
+ scored.append(priority)
219
+
220
+ # Sort by score descending
221
+ scored.sort(key=lambda x: x.priority_score, reverse=True)
222
+ return scored
223
+
224
+ def _build_import_graph(
225
+ self,
226
+ files: List[Tuple[str, str]],
227
+ project_path: Path
228
+ ) -> Dict[str, Set[str]]:
229
+ """Build import dependency graph."""
230
+ # Map module names to who imports them
231
+ imported_by: Dict[str, Set[str]] = defaultdict(set)
232
+
233
+ for file_path, module_name in files:
234
+ try:
235
+ content = Path(file_path).read_text()
236
+ tree = ast.parse(content)
237
+
238
+ for node in ast.walk(tree):
239
+ if isinstance(node, ast.Import):
240
+ for alias in node.names:
241
+ # Simplified - just record the top-level module
242
+ top_module = alias.name.split('.')[0]
243
+ imported_by[top_module].add(module_name)
244
+
245
+ elif isinstance(node, ast.ImportFrom):
246
+ if node.module:
247
+ top_module = node.module.split('.')[0]
248
+ imported_by[top_module].add(module_name)
249
+ except:
250
+ pass
251
+
252
+ return imported_by
253
+
254
+ def _check_has_main(self, file_path: str) -> bool:
255
+ """Check if file has if __name__ == "__main__" block."""
256
+ try:
257
+ content = Path(file_path).read_text()
258
+ return 'if __name__' in content and '__main__' in content
259
+ except:
260
+ return False
261
+
262
+
263
+ class StreamingAnalyzer:
264
+ """Memory-efficient streaming analyzer with progress tracking."""
265
+
266
+ def __init__(
267
+ self,
268
+ config: Optional[Config] = None,
269
+ strategy: Optional[ScanStrategy] = None
270
+ ):
271
+ self.config = config or FAST_CONFIG
272
+ self.strategy = strategy or STRATEGY_STANDARD
273
+
274
+ # Streaming cache with memory bounds
275
+ self.cache = StreamingFileCache(
276
+ max_size=self.strategy.max_files_in_memory,
277
+ cache_dir=self.config.performance.cache_dir
278
+ ) if self.config.performance.enable_cache else None
279
+
280
+ self.prioritizer = SmartPrioritizer(self.strategy)
281
+
282
+ # Progress tracking
283
+ self._progress_callback: Optional[callable] = None
284
+ self._cancelled = False
285
+
286
+ def set_progress_callback(self, callback: callable) -> None:
287
+ """Set callback for progress updates."""
288
+ self._progress_callback = callback
289
+
290
+ def cancel(self) -> None:
291
+ """Cancel ongoing analysis."""
292
+ self._cancelled = True
293
+
294
+ def analyze_streaming(
295
+ self,
296
+ project_path: str,
297
+ output_callback: Optional[callable] = None
298
+ ) -> Iterator[Dict]:
299
+ """Analyze project with streaming output (yields partial results)."""
300
+ start_time = time.time()
301
+ project_path = Path(project_path).resolve()
302
+
303
+ # Phase 1: Collect and prioritize files
304
+ raw_files = self._collect_files(project_path)
305
+ prioritized = self.prioritizer.prioritize_files(raw_files, project_path)
306
+
307
+ total_files = len(prioritized)
308
+ processed = 0
309
+
310
+ self._report_progress(
311
+ phase="collect",
312
+ current=0,
313
+ total=total_files,
314
+ message=f"Found {total_files} files to analyze"
315
+ )
316
+
317
+ # Phase 2: Quick scan (functions/classes only)
318
+ quick_results = []
319
+ for priority in prioritized:
320
+ if self._cancelled:
321
+ break
322
+
323
+ result = self._quick_scan_file(priority)
324
+ if result:
325
+ quick_results.append(result)
326
+ processed += 1
327
+
328
+ # Yield incremental result
329
+ yield {
330
+ 'type': 'file_complete',
331
+ 'file': priority.file_path,
332
+ 'priority': priority.priority_score,
333
+ 'functions': len(result.get('functions', {})),
334
+ 'classes': len(result.get('classes', {})),
335
+ 'progress': processed / total_files,
336
+ 'eta_seconds': self._estimate_eta(start_time, processed, total_files)
337
+ }
338
+
339
+ self._report_progress(
340
+ phase="quick_scan",
341
+ current=processed,
342
+ total=total_files,
343
+ message=f"Scanned {priority.module_name} (priority: {priority.priority_score:.1f})"
344
+ )
345
+
346
+ # Phase 3: Build call graph (memory efficient)
347
+ if self.strategy.phase_2_call_graph and not self._cancelled:
348
+ call_graph = self._build_call_graph_streaming(quick_results)
349
+
350
+ yield {
351
+ 'type': 'call_graph_complete',
352
+ 'functions': len(call_graph),
353
+ 'edges': sum(len(calls) for calls in call_graph.values())
354
+ }
355
+
356
+ # Phase 4: Deep analysis for important files (selective CFG)
357
+ if self.strategy.phase_3_deep_analysis and not self._cancelled:
358
+ important_files = self._select_important_files(prioritized, quick_results)
359
+
360
+ deep_processed = 0
361
+ for priority in important_files[:50]: # Limit to top 50
362
+ if self._cancelled:
363
+ break
364
+
365
+ result = self._deep_analyze_file(priority)
366
+ if result:
367
+ deep_processed += 1
368
+ yield {
369
+ 'type': 'deep_complete',
370
+ 'file': priority.file_path,
371
+ 'nodes': len(result.get('nodes', {})),
372
+ 'progress': deep_processed / len(important_files)
373
+ }
374
+
375
+ # Final summary
376
+ yield {
377
+ 'type': 'complete',
378
+ 'total_files': total_files,
379
+ 'processed_files': processed,
380
+ 'elapsed_seconds': time.time() - start_time
381
+ }
382
+
383
+ def _quick_scan_file(self, priority: FilePriority) -> Optional[Dict]:
384
+ """Quick scan - extract functions and classes only (no CFG)."""
385
+ try:
386
+ content = Path(priority.file_path).read_text(encoding='utf-8', errors='ignore')
387
+ except Exception:
388
+ return None
389
+
390
+ # Try cache
391
+ if self.cache:
392
+ cached = self.cache.get(priority.file_path, content)
393
+ if cached:
394
+ tree, _ = cached
395
+ else:
396
+ try:
397
+ tree = ast.parse(content)
398
+ self.cache.put(priority.file_path, content, (tree, content))
399
+ except SyntaxError:
400
+ return None
401
+ else:
402
+ try:
403
+ tree = ast.parse(content)
404
+ except SyntaxError:
405
+ return None
406
+
407
+ result = {
408
+ 'module': ModuleInfo(
409
+ name=priority.module_name,
410
+ file=priority.file_path
411
+ ),
412
+ 'functions': {},
413
+ 'classes': {},
414
+ 'nodes': {},
415
+ 'edges': []
416
+ }
417
+
418
+ lines = content.split('\n')
419
+
420
+ for node in ast.walk(tree):
421
+ if isinstance(node, ast.ClassDef):
422
+ cls_info = ClassInfo(
423
+ name=node.name,
424
+ qualified_name=f"{priority.module_name}.{node.name}",
425
+ file=priority.file_path,
426
+ line=node.lineno,
427
+ module=priority.module_name
428
+ )
429
+ result['classes'][cls_info.qualified_name] = cls_info
430
+ result['module'].classes.append(cls_info.qualified_name)
431
+
432
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
433
+ # Skip private if configured
434
+ if self.strategy.skip_private_functions and node.name.startswith('_'):
435
+ continue
436
+
437
+ func_info = FunctionInfo(
438
+ name=node.name,
439
+ qualified_name=f"{priority.module_name}.{node.name}",
440
+ file=priority.file_path,
441
+ line=node.lineno,
442
+ module=priority.module_name
443
+ )
444
+
445
+ # Extract calls (lightweight)
446
+ for child in ast.walk(node):
447
+ if isinstance(child, ast.Call):
448
+ if isinstance(child.func, ast.Name):
449
+ func_info.calls.append(child.func.id)
450
+
451
+ result['functions'][func_info.qualified_name] = func_info
452
+ result['module'].functions.append(func_info.qualified_name)
453
+
454
+ return result
455
+
456
+ def _deep_analyze_file(self, priority: FilePriority) -> Optional[Dict]:
457
+ """Deep analysis with limited CFG generation."""
458
+ result = self._quick_scan_file(priority)
459
+ if not result:
460
+ return None
461
+
462
+ # Only build CFG for functions under node limit
463
+ max_nodes = self.strategy.max_nodes_per_function
464
+
465
+ for func_name, func in result['functions'].items():
466
+ # Skip if too many calls (simplistic heuristic)
467
+ if len(func.calls) > 20:
468
+ continue
469
+
470
+ # Build simplified CFG
471
+ entry_id = f"{func_name}_entry"
472
+ exit_id = f"{func_name}_exit"
473
+
474
+ result['nodes'][entry_id] = FlowNode(
475
+ id=entry_id, type='ENTRY', label='entry', function=func_name
476
+ )
477
+ result['nodes'][exit_id] = FlowNode(
478
+ id=exit_id, type='EXIT', label='exit', function=func_name
479
+ )
480
+
481
+ # Limit total nodes
482
+ if len(result['nodes']) > self.strategy.max_total_nodes:
483
+ break
484
+
485
+ return result
486
+
487
+ def _build_call_graph_streaming(self, results: List[Dict]) -> Dict[str, List[str]]:
488
+ """Memory-efficient call graph construction."""
489
+ call_graph = {}
490
+
491
+ # Build function name lookup
492
+ all_functions = {}
493
+ for r in results:
494
+ all_functions.update(r.get('functions', {}))
495
+
496
+ # Resolve calls
497
+ for r in results:
498
+ for func_name, func in r.get('functions', {}).items():
499
+ resolved_calls = []
500
+ for called in func.calls:
501
+ # Try to resolve to known function
502
+ for known_name in all_functions:
503
+ if known_name.endswith(f".{called}") or known_name == called:
504
+ resolved_calls.append(known_name)
505
+ break
506
+
507
+ func.calls = resolved_calls
508
+ call_graph[func_name] = resolved_calls
509
+
510
+ return call_graph
511
+
512
+ def _select_important_files(
513
+ self,
514
+ prioritized: List[FilePriority],
515
+ results: List[Dict]
516
+ ) -> List[FilePriority]:
517
+ """Select files for deep analysis based on importance."""
518
+ important = []
519
+
520
+ for p in prioritized:
521
+ # Entry points are important
522
+ if p.is_entry_point:
523
+ important.append(p)
524
+ continue
525
+
526
+ # Find result for this file
527
+ for r in results:
528
+ mod = r.get('module')
529
+ if mod and mod.name == p.module_name:
530
+ # Files with many functions are important
531
+ if len(mod.functions) > 5:
532
+ important.append(p)
533
+ break
534
+
535
+ # Files called by many others
536
+ if p.import_count > 3:
537
+ important.append(p)
538
+ break
539
+
540
+ return important
541
+
542
+ def _collect_files(self, project_path: Path) -> List[Tuple[str, str]]:
543
+ """Collect Python files with filtering."""
544
+ files = []
545
+
546
+ for py_file in project_path.rglob("*.py"):
547
+ file_str = str(py_file)
548
+
549
+ # Apply filters
550
+ if self.strategy.skip_test_files:
551
+ if any(x in file_str.lower() for x in ['test', '_test', 'conftest']):
552
+ continue
553
+
554
+ if any(x in file_str.lower() for x in ['__pycache__', '.venv', 'venv']):
555
+ continue
556
+
557
+ # Calculate module name
558
+ rel_path = py_file.relative_to(project_path)
559
+ parts = list(rel_path.parts)[:-1]
560
+ if py_file.name == '__init__.py':
561
+ module_name = '.'.join(parts) if parts else project_path.name
562
+ else:
563
+ module_name = '.'.join(parts + [py_file.stem])
564
+
565
+ files.append((file_str, module_name))
566
+
567
+ return files
568
+
569
+ def _estimate_eta(
570
+ self,
571
+ start_time: float,
572
+ processed: int,
573
+ total: int
574
+ ) -> float:
575
+ """Estimate remaining time."""
576
+ if processed == 0:
577
+ return 0.0
578
+
579
+ elapsed = time.time() - start_time
580
+ rate = processed / elapsed
581
+ remaining = total - processed
582
+
583
+ return remaining / rate if rate > 0 else 0.0
584
+
585
+ def _report_progress(
586
+ self,
587
+ phase: str,
588
+ current: int,
589
+ total: int,
590
+ message: str
591
+ ) -> None:
592
+ """Report progress via callback."""
593
+ if self._progress_callback:
594
+ self._progress_callback({
595
+ 'phase': phase,
596
+ 'current': current,
597
+ 'total': total,
598
+ 'percentage': (current / total * 100) if total > 0 else 0,
599
+ 'message': message
600
+ })
601
+
602
+
603
+ class IncrementalAnalyzer:
604
+ """Incremental analysis with change detection."""
605
+
606
+ def __init__(self, config: Optional[Config] = None):
607
+ self.config = config or FAST_CONFIG
608
+ self.state_file = Path(".code2flow_state.json")
609
+ self.previous_state: Dict[str, str] = {}
610
+ self._load_state()
611
+
612
+ def _load_state(self) -> None:
613
+ """Load previous analysis state."""
614
+ if self.state_file.exists():
615
+ try:
616
+ with open(self.state_file, 'r') as f:
617
+ data = json.load(f)
618
+ self.previous_state = data.get('file_hashes', {})
619
+ except:
620
+ pass
621
+
622
+ def _save_state(self, current_state: Dict[str, str]) -> None:
623
+ """Save current analysis state."""
624
+ with open(self.state_file, 'w') as f:
625
+ json.dump({
626
+ 'file_hashes': current_state,
627
+ 'timestamp': time.time()
628
+ }, f)
629
+
630
+ def get_changed_files(
631
+ self,
632
+ project_path: Path
633
+ ) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
634
+ """Get changed and unchanged files."""
635
+ changed = []
636
+ unchanged = []
637
+ current_state = {}
638
+
639
+ for py_file in project_path.rglob("*.py"):
640
+ try:
641
+ content = py_file.read_bytes()
642
+ file_hash = hashlib.md5(content).hexdigest()
643
+ file_str = str(py_file)
644
+
645
+ current_state[file_str] = file_hash
646
+
647
+ if file_str in self.previous_state:
648
+ if self.previous_state[file_str] == file_hash:
649
+ unchanged.append((file_str, self._get_module_name(py_file, project_path)))
650
+ else:
651
+ changed.append((file_str, self._get_module_name(py_file, project_path)))
652
+ else:
653
+ changed.append((file_str, self._get_module_name(py_file, project_path)))
654
+ except:
655
+ pass
656
+
657
+ self._save_state(current_state)
658
+ return changed, unchanged
659
+
660
+ def _get_module_name(self, py_file: Path, project_path: Path) -> str:
661
+ """Calculate module name."""
662
+ rel_path = py_file.relative_to(project_path)
663
+ parts = list(rel_path.parts)[:-1]
664
+ if py_file.name == '__init__.py':
665
+ return '.'.join(parts) if parts else project_path.name
666
+ return '.'.join(parts + [py_file.stem])
@@ -0,0 +1,17 @@
1
+ """Exporters package for code2flow."""
2
+
3
+ from .base import Exporter
4
+ from .json_exporter import JSONExporter
5
+ from .yaml_exporter import YAMLExporter
6
+ from .mermaid_exporter import MermaidExporter
7
+ from .llm_exporter import LLMPromptExporter
8
+ from .toon import ToonExporter
9
+
10
+ __all__ = [
11
+ 'Exporter',
12
+ 'JSONExporter',
13
+ 'YAMLExporter',
14
+ 'MermaidExporter',
15
+ 'LLMPromptExporter',
16
+ 'ToonExporter'
17
+ ]
@@ -0,0 +1,13 @@
1
+ """Base Exporter class for code2flow."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from ..core.models import AnalysisResult
5
+
6
+
7
+ class Exporter(ABC):
8
+ """Abstract base class for all exporters."""
9
+
10
+ @abstractmethod
11
+ def export(self, result: AnalysisResult, output_path: str, **kwargs) -> None:
12
+ """Export analysis result to the specified path."""
13
+ pass