code2llm 0.5.121__tar.gz → 0.5.123__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {code2llm-0.5.121 → code2llm-0.5.123}/PKG-INFO +2 -2
  2. {code2llm-0.5.121 → code2llm-0.5.123}/README.md +1 -1
  3. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/__init__.py +1 -1
  4. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_commands.py +6 -5
  5. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_exports/orchestrator.py +99 -1
  6. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_parser.py +12 -0
  7. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/analyzer.py +49 -50
  8. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/config.py +65 -7
  9. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/file_analyzer.py +2 -6
  10. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/gitignore.py +1 -1
  11. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/__init__.py +1 -3
  12. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/base.py +11 -1
  13. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/models.py +0 -1
  14. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/refactoring.py +0 -1
  15. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/generators/mermaid.py +40 -10
  16. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/__init__.py +1 -1
  17. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/PKG-INFO +2 -2
  18. {code2llm-0.5.121 → code2llm-0.5.123}/pyproject.toml +1 -1
  19. {code2llm-0.5.121 → code2llm-0.5.123}/setup.py +1 -1
  20. {code2llm-0.5.121 → code2llm-0.5.123}/LICENSE +0 -0
  21. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/__main__.py +0 -0
  22. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/__init__.py +0 -0
  23. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/call_graph.py +0 -0
  24. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/cfg.py +0 -0
  25. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/coupling.py +0 -0
  26. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/data_analysis.py +0 -0
  27. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/dfg.py +0 -0
  28. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/pipeline_classifier.py +0 -0
  29. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/pipeline_detector.py +0 -0
  30. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/pipeline_resolver.py +0 -0
  31. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/side_effects.py +0 -0
  32. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/smells.py +0 -0
  33. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/type_inference.py +0 -0
  34. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/utils/__init__.py +0 -0
  35. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/analysis/utils/ast_helpers.py +0 -0
  36. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/api.py +0 -0
  37. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli.py +0 -0
  38. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_analysis.py +0 -0
  39. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_exports/__init__.py +0 -0
  40. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_exports/code2logic.py +0 -0
  41. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_exports/formats.py +0 -0
  42. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/cli_exports/prompt.py +0 -0
  43. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/__init__.py +0 -0
  44. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/ast_registry.py +0 -0
  45. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/export_pipeline.py +0 -0
  46. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/file_cache.py +0 -0
  47. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/file_filter.py +0 -0
  48. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/incremental.py +0 -0
  49. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/cpp.py +0 -0
  50. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/csharp.py +0 -0
  51. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/generic.py +0 -0
  52. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/go_lang.py +0 -0
  53. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/java.py +0 -0
  54. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/php.py +0 -0
  55. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/ruby.py +0 -0
  56. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/rust.py +0 -0
  57. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/ts_extractors.py +0 -0
  58. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/ts_parser.py +0 -0
  59. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/lang/typescript.py +0 -0
  60. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/large_repo.py +0 -0
  61. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/persistent_cache.py +0 -0
  62. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/repo_files.py +0 -0
  63. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/__init__.py +0 -0
  64. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/cache.py +0 -0
  65. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/incremental.py +0 -0
  66. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/prioritizer.py +0 -0
  67. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/scanner.py +0 -0
  68. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming/strategies.py +0 -0
  69. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/streaming_analyzer.py +0 -0
  70. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/core/toon_size_manager.py +0 -0
  71. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/__init__.py +0 -0
  72. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/article_view.py +0 -0
  73. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/base.py +0 -0
  74. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/context_exporter.py +0 -0
  75. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/context_view.py +0 -0
  76. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/dashboard_data.py +0 -0
  77. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/dashboard_renderer.py +0 -0
  78. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/evolution_exporter.py +0 -0
  79. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/flow_constants.py +0 -0
  80. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/flow_exporter.py +0 -0
  81. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/flow_renderer.py +0 -0
  82. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/html_dashboard.py +0 -0
  83. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/index_generator/__init__.py +0 -0
  84. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/index_generator/renderer.py +0 -0
  85. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/index_generator/scanner.py +0 -0
  86. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/index_generator.py +0 -0
  87. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/json_exporter.py +0 -0
  88. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/llm_exporter.py +0 -0
  89. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/map_exporter.py +0 -0
  90. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/mermaid_exporter.py +0 -0
  91. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/mermaid_flow_helpers.py +0 -0
  92. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/__init__.py +0 -0
  93. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/constants.py +0 -0
  94. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/core.py +0 -0
  95. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/evolution.py +0 -0
  96. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/health.py +0 -0
  97. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/hotspots.py +0 -0
  98. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml/modules.py +0 -0
  99. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/project_yaml_exporter.py +0 -0
  100. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/readme_exporter.py +0 -0
  101. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/report_generators.py +0 -0
  102. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/__init__.py +0 -0
  103. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/helpers.py +0 -0
  104. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/metrics.py +0 -0
  105. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/metrics_core.py +0 -0
  106. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/metrics_duplicates.py +0 -0
  107. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/metrics_health.py +0 -0
  108. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/module_detail.py +0 -0
  109. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon/renderer.py +0 -0
  110. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon.py +0 -0
  111. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/toon_view.py +0 -0
  112. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/validate_project.py +0 -0
  113. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/exporters/yaml_exporter.py +0 -0
  114. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/generators/__init__.py +0 -0
  115. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/generators/_utils.py +0 -0
  116. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/generators/llm_flow.py +0 -0
  117. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/generators/llm_task.py +0 -0
  118. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/config.py +0 -0
  119. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/entity_resolution.py +0 -0
  120. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/intent_matching.py +0 -0
  121. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/normalization.py +0 -0
  122. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/nlp/pipeline.py +0 -0
  123. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/patterns/__init__.py +0 -0
  124. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/patterns/detector.py +0 -0
  125. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/refactor/__init__.py +0 -0
  126. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm/refactor/prompt_engine.py +0 -0
  127. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/SOURCES.txt +0 -0
  128. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/dependency_links.txt +0 -0
  129. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/entry_points.txt +0 -0
  130. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/requires.txt +0 -0
  131. {code2llm-0.5.121 → code2llm-0.5.123}/code2llm.egg-info/top_level.txt +0 -0
  132. {code2llm-0.5.121 → code2llm-0.5.123}/setup.cfg +0 -0
  133. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_advanced_analysis.py +0 -0
  134. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_analyzer.py +0 -0
  135. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_calls_toon_export.py +0 -0
  136. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_deep_analysis.py +0 -0
  137. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_edge_cases.py +0 -0
  138. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_flow_exporter.py +0 -0
  139. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_format_quality.py +0 -0
  140. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_multilanguage_e2e.py +0 -0
  141. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_nlp_pipeline.py +0 -0
  142. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_nonpython_cc_calls.py +0 -0
  143. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_persistent_cache.py +0 -0
  144. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_pipeline_detector.py +0 -0
  145. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_project_toon_export.py +0 -0
  146. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_prompt_engine.py +0 -0
  147. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_prompt_txt.py +0 -0
  148. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_refactoring_engine.py +0 -0
  149. {code2llm-0.5.121 → code2llm-0.5.123}/tests/test_toon_v2.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2llm
3
- Version: 0.5.121
3
+ Version: 0.5.123
4
4
  Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
5
5
  Home-page: https://github.com/wronai/stts
6
6
  Author: STTS Project
@@ -67,7 +67,7 @@ Dynamic: requires-python
67
67
 
68
68
  ## AI Cost Tracking
69
69
 
70
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
70
+ ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.123-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
71
71
  ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
72
72
 
73
73
  - 🤖 **LLM usage:** $7.5000 (166 commits)
@@ -3,7 +3,7 @@
3
3
 
4
4
  ## AI Cost Tracking
5
5
 
6
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
6
+ ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.123-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
7
7
  ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
8
8
 
9
9
  - 🤖 **LLM usage:** $7.5000 (166 commits)
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
8
8
  and entity resolution with multilingual support.
9
9
  """
10
10
 
11
- __version__ = "0.5.121"
11
+ __version__ = "0.5.123"
12
12
  __author__ = "STTS Project"
13
13
 
14
14
  # Core analysis components (lightweight, always needed)
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Optional
7
7
 
8
8
  from .cli_exports import _run_report
9
+ from .core.config import DEFAULT_CACHE_MAX_AGE_DAYS, KB
9
10
 
10
11
 
11
12
  def handle_special_commands() -> Optional[int]:
@@ -39,18 +40,18 @@ def handle_cache_command(args_list) -> int:
39
40
  parser.add_argument('action', choices=['status', 'clear', 'gc'], help='Cache action')
40
41
  parser.add_argument('--all', action='store_true', dest='all_projects',
41
42
  help='Apply to all cached projects (clear only)')
42
- parser.add_argument('--max-age', type=int, default=30, metavar='DAYS',
43
- help='Max age in days for gc (default: 30)')
43
+ parser.add_argument('--max-age', type=int, default=DEFAULT_CACHE_MAX_AGE_DAYS, metavar='DAYS',
44
+ help=f'Max age in days for gc (default: {DEFAULT_CACHE_MAX_AGE_DAYS})')
44
45
  args = parser.parse_args(args_list)
45
46
 
46
47
  if args.action == 'status':
47
48
  projects = get_all_projects()
48
49
  root = _DEFAULT_ROOT
49
- total_mb = sum(p.get('cache_size_bytes', 0) for p in projects) / (1024 * 1024)
50
+ total_mb = sum(p.get('cache_size_bytes', 0) for p in projects) / (KB * KB)
50
51
  print(f"Cache: {root}")
51
52
  print(f" Projects: {len(projects)} Total: {total_mb:.1f} MB")
52
53
  for p in projects:
53
- size_mb = p.get('cache_size_bytes', 0) / (1024 * 1024)
54
+ size_mb = p.get('cache_size_bytes', 0) / (KB * KB)
54
55
  updated = p.get('updated_at', 0)
55
56
  age_min = int((time.time() - updated) / 60) if updated else 0
56
57
  age_str = f"{age_min}m ago" if age_min < 120 else f"{age_min//60}h ago"
@@ -246,7 +247,7 @@ def _get_file_sizes(chunk_dir: Path, required_files: list[str]) -> str:
246
247
  sizes = []
247
248
  for req_file in required_files:
248
249
  size = (chunk_dir / req_file).stat().st_size
249
- sizes.append(f"{req_file}:{size//1024}KB" if size > 1024 else f"{req_file}:{size}B")
250
+ sizes.append(f"{req_file}:{size//KB}KB" if size > KB else f"{req_file}:{size}B")
250
251
  return ", ".join(sizes)
251
252
 
252
253
 
@@ -4,10 +4,18 @@ Refactored to use EXPORT_REGISTRY for core format dispatch.
4
4
  Maintains backward compatibility with all existing --format values.
5
5
  """
6
6
 
7
+ import shutil
7
8
  import sys
8
9
  from pathlib import Path
9
10
  from typing import Optional, List, Dict, Any
10
11
 
12
+ # Optional progress bar support
13
+ try:
14
+ from tqdm import tqdm
15
+ _HAS_TQDM = True
16
+ except ImportError:
17
+ _HAS_TQDM = False
18
+
11
19
  from code2llm.exporters import (
12
20
  get_exporter,
13
21
  EXPORT_REGISTRY,
@@ -17,6 +25,8 @@ from code2llm.exporters import (
17
25
  IndexHTMLGenerator,
18
26
  )
19
27
  from code2llm.exporters.project_yaml.evolution import load_previous_evolution
28
+ from code2llm.core.persistent_cache import PersistentCache
29
+ from code2llm.core.config import DEFAULT_PROGRESS_BAR_THRESHOLD
20
30
 
21
31
 
22
32
  # Format output filenames
@@ -46,26 +56,103 @@ FORMAT_LABELS: Dict[str, str] = {
46
56
  }
47
57
 
48
58
 
59
+ def _build_export_config(args, formats: List[str]) -> Dict[str, Any]:
60
+ """Build config dict for export caching."""
61
+ return {
62
+ 'formats': sorted(formats),
63
+ 'png': getattr(args, 'png', False),
64
+ 'no_png': getattr(args, 'no_png', False),
65
+ 'flow_include_examples': getattr(args, 'flow_include_examples', False),
66
+ 'full': getattr(args, 'full', False),
67
+ 'refactor': getattr(args, 'refactor', False),
68
+ 'data_structures': getattr(args, 'data_structures', False),
69
+ }
70
+
71
+
49
72
  def _run_exports(args, result, output_dir: Path, source_path: Optional[Path] = None):
50
73
  """Export analysis results in requested formats.
51
74
 
52
75
  Uses EXPORT_REGISTRY for core format dispatch.
53
76
  For chunked analysis, exports to subproject subdirectories.
77
+ Supports export-level caching for repeated runs.
54
78
  """
55
79
  requested_formats = [f.strip() for f in args.format.split(',')]
56
80
  formats = _expand_all_formats(requested_formats, getattr(args, 'png', False))
57
81
  is_chunked = getattr(args, 'chunk', False)
58
82
 
83
+ # Skip cache for chunked or when explicitly disabled
84
+ skip_cache = is_chunked or getattr(args, 'no_cache', False)
85
+
86
+ if not skip_cache and source_path:
87
+ cache = PersistentCache(str(source_path))
88
+ config_dict = _build_export_config(args, formats)
89
+ cached_export_dir = cache.get_export_cache_dir(config_dict)
90
+
91
+ if cached_export_dir:
92
+ if args.verbose:
93
+ print(f" Using cached export from: {cached_export_dir}")
94
+ # Copy cached files to output_dir
95
+ _copy_cached_export(cached_export_dir, output_dir, verbose=args.verbose)
96
+ return
97
+
59
98
  try:
60
99
  if is_chunked and source_path:
61
100
  _export_chunked(args, result, output_dir, source_path, formats, requested_formats)
62
101
  else:
63
102
  _export_single(args, result, output_dir, formats, requested_formats, source_path)
103
+
104
+ # Mark export as complete in cache
105
+ if not skip_cache and source_path:
106
+ cache = PersistentCache(str(source_path))
107
+ config_dict = _build_export_config(args, formats)
108
+ export_cache_dir = cache.create_export_cache_dir(config_dict)
109
+ _copy_to_cache(output_dir, export_cache_dir, verbose=args.verbose)
110
+ cache.mark_export_complete(export_cache_dir)
111
+ cache.save()
112
+ if args.verbose:
113
+ print(f" Export cached at: {export_cache_dir}")
114
+
64
115
  except Exception as e:
65
116
  print(f"Error during export: {e}", file=sys.stderr)
66
117
  sys.exit(1)
67
118
 
68
119
 
120
+ def _copy_cached_export(cached_dir: Path, output_dir: Path, verbose: bool = False) -> None:
121
+ """Copy files from cached export to output directory."""
122
+ output_dir.mkdir(parents=True, exist_ok=True)
123
+ items = [item for item in cached_dir.iterdir() if item.name != '_complete']
124
+
125
+ # Progress bar for large cache restores
126
+ use_tqdm = _HAS_TQDM and not verbose and len(items) > DEFAULT_PROGRESS_BAR_THRESHOLD
127
+ item_iterator = tqdm(items, desc="Restoring from cache") if use_tqdm else items
128
+
129
+ for item in item_iterator:
130
+ dest = output_dir / item.name
131
+ if item.is_dir():
132
+ shutil.copytree(item, dest, dirs_exist_ok=True)
133
+ else:
134
+ shutil.copy2(item, dest)
135
+
136
+
137
+ def _copy_to_cache(output_dir: Path, cache_dir: Path, verbose: bool = False) -> None:
138
+ """Copy export files to cache directory."""
139
+ cache_dir.mkdir(parents=True, exist_ok=True)
140
+ if not output_dir.exists():
141
+ return
142
+
143
+ items = list(output_dir.iterdir())
144
+ # Progress bar for large cache saves
145
+ use_tqdm = _HAS_TQDM and not verbose and len(items) > DEFAULT_PROGRESS_BAR_THRESHOLD
146
+ item_iterator = tqdm(items, desc="Saving to cache") if use_tqdm else items
147
+
148
+ for item in item_iterator:
149
+ dest = cache_dir / item.name
150
+ if item.is_dir():
151
+ shutil.copytree(item, dest, dirs_exist_ok=True)
152
+ else:
153
+ shutil.copy2(item, dest)
154
+
155
+
69
156
  def _expand_all_formats(requested: List[str], include_png: bool = False) -> List[str]:
70
157
  """Expand 'all' to concrete format list."""
71
158
  if 'all' not in requested:
@@ -121,7 +208,18 @@ def _export_single(
121
208
 
122
209
  def _export_registry_formats(args, result, output_dir: Path, formats: List[str]):
123
210
  """Export core formats via EXPORT_REGISTRY lookup."""
124
- for fmt in formats:
211
+ # Use progress bar when many formats and not in verbose mode
212
+ use_tqdm = (
213
+ _HAS_TQDM and
214
+ not args.verbose and
215
+ len(formats) > DEFAULT_PROGRESS_BAR_THRESHOLD
216
+ )
217
+
218
+ format_iterator = formats
219
+ if use_tqdm:
220
+ format_iterator = tqdm(formats, desc="Exporting formats")
221
+
222
+ for fmt in format_iterator:
125
223
  exporter_cls = get_exporter(fmt)
126
224
  if exporter_cls is None:
127
225
  continue
@@ -147,6 +147,18 @@ Strategy Options (--strategy):
147
147
  help='Force re-analysis and re-export even when cache is valid (alias for --no-cache)'
148
148
  )
149
149
 
150
+ parser.add_argument(
151
+ '--dry-run',
152
+ action='store_true',
153
+ help='Show what would be exported without writing files'
154
+ )
155
+
156
+ parser.add_argument(
157
+ '--watch',
158
+ action='store_true',
159
+ help='Auto-detect changed files and only re-analyze those (faster subsequent runs)'
160
+ )
161
+
150
162
  parser.add_argument(
151
163
  '--strategy',
152
164
  choices=['quick', 'standard', 'deep'],
@@ -7,9 +7,16 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
7
7
  from pathlib import Path
8
8
  from typing import Dict, List, Optional, Tuple
9
9
 
10
+ # Optional tqdm for progress bars
11
+ try:
12
+ from tqdm import tqdm
13
+ _HAS_TQDM = True
14
+ except ImportError:
15
+ _HAS_TQDM = False
16
+
10
17
  logger = logging.getLogger(__name__)
11
18
 
12
- from .config import Config, FAST_CONFIG, ALL_EXTENSIONS, LANGUAGE_EXTENSIONS
19
+ from .config import Config, FAST_CONFIG, ALL_EXTENSIONS, LANGUAGE_EXTENSIONS, DEFAULT_PROGRESS_BAR_THRESHOLD
13
20
  from .models import AnalysisResult, FlowEdge, FlowNode, Pattern
14
21
  from code2llm.analysis.call_graph import CallGraphExtractor
15
22
 
@@ -41,24 +48,15 @@ class ProjectAnalyzer:
41
48
 
42
49
  if self.config.verbose:
43
50
  print(f"Found {len(files)} files to analyze")
44
- print(f" - Parallel: {self.config.performance.parallel_enabled}, Workers: {self.config.performance.parallel_workers}")
51
+ workers = self.config.performance.get_workers()
52
+ print(f" - Parallel: {self.config.performance.parallel_enabled}, Workers: {workers}")
45
53
 
46
54
  pcache, cached_results, files_to_analyze = self._load_from_persistent_cache(files, project_path)
47
55
  fresh_results = self._run_analysis(files_to_analyze)
48
56
  self._store_to_persistent_cache(pcache, files_to_analyze, fresh_results)
49
57
 
50
58
  merged = self._merge_results(cached_results + fresh_results, str(project_path))
51
- self._build_call_graph(merged)
52
- if not self.config.performance.skip_pattern_detection:
53
- self._detect_patterns(merged)
54
- if self.config.verbose:
55
- print(f" - Running refactoring analysis...", flush=True)
56
- self.refactoring_analyzer.perform_refactoring_analysis(merged)
57
- if self.config.verbose:
58
- print(f" - Refactoring analysis complete", flush=True)
59
- merged.stats = self._build_stats(files, cached_results + fresh_results, merged, start_time)
60
- if self.config.verbose:
61
- self._print_summary(merged)
59
+ self._post_process(merged, files, cached_results + fresh_results, start_time)
62
60
  return merged
63
61
 
64
62
  def _resolve_project_path(self, project_path: str) -> Path:
@@ -146,6 +144,26 @@ class ProjectAnalyzer:
146
144
  print(f" Classes: {len(merged.classes)}")
147
145
  print(f" CFG Nodes: {len(merged.nodes)}")
148
146
  print(f" Patterns: {len(merged.patterns)}")
147
+
148
+ def _post_process(
149
+ self,
150
+ merged: AnalysisResult,
151
+ files: List,
152
+ results: List[Dict],
153
+ start_time: float,
154
+ ) -> None:
155
+ """Run post-processing: call graph, patterns, refactoring, stats."""
156
+ self._build_call_graph(merged)
157
+ if not self.config.performance.skip_pattern_detection:
158
+ self._detect_patterns(merged)
159
+ if self.config.verbose:
160
+ print(" - Running refactoring analysis...", flush=True)
161
+ self.refactoring_analyzer.perform_refactoring_analysis(merged)
162
+ if self.config.verbose:
163
+ print(" - Refactoring analysis complete", flush=True)
164
+ merged.stats = self._build_stats(files, results, merged, start_time)
165
+ if self.config.verbose:
166
+ self._print_summary(merged)
149
167
 
150
168
  def _collect_files(self, project_path: Path) -> List[Tuple[str, str]]:
151
169
  """Collect all source files with their module names for all supported languages.
@@ -197,7 +215,7 @@ class ProjectAnalyzer:
197
215
  def _analyze_parallel(self, files: List[Tuple[str, str]]) -> List[Dict]:
198
216
  """Analyze files in parallel."""
199
217
  results = []
200
- workers = min(self.config.performance.parallel_workers, len(files))
218
+ workers = min(self.config.performance.get_workers(), len(files))
201
219
 
202
220
  # Convert config to dict for pickle compatibility
203
221
  config_dict = {
@@ -215,9 +233,13 @@ class ProjectAnalyzer:
215
233
  for file_path, module_name in files
216
234
  }
217
235
 
218
- # Collect results as they complete
236
+ # Collect results as they complete (with optional progress bar)
219
237
  completed = 0
220
- for future in as_completed(future_to_file):
238
+ iterator = as_completed(future_to_file)
239
+ if not self.config.verbose and len(files) > DEFAULT_PROGRESS_BAR_THRESHOLD and _HAS_TQDM:
240
+ iterator = tqdm(iterator, total=len(files), desc="Analyzing")
241
+
242
+ for future in iterator:
221
243
  file_path, module_name = future_to_file[future]
222
244
  try:
223
245
  result = future.result()
@@ -238,7 +260,12 @@ class ProjectAnalyzer:
238
260
  analyzer = FileAnalyzer(self.config, self.cache)
239
261
  total = len(files)
240
262
 
241
- for i, (file_path, module_name) in enumerate(files, 1):
263
+ # Use tqdm for large projects in non-verbose mode
264
+ file_iterator = enumerate(files, 1)
265
+ if not self.config.verbose and total > DEFAULT_PROGRESS_BAR_THRESHOLD and _HAS_TQDM:
266
+ file_iterator = tqdm(list(file_iterator), desc="Analyzing", total=total)
267
+
268
+ for i, (file_path, module_name) in file_iterator:
242
269
  try:
243
270
  result = analyzer.analyze_file(file_path, module_name)
244
271
  if result:
@@ -355,47 +382,19 @@ class ProjectAnalyzer:
355
382
 
356
383
  def analyze_files(self, files: List[Tuple[str, str]], project_path: str) -> AnalysisResult:
357
384
  """Analyze specific list of files (for chunked analysis).
358
-
385
+
359
386
  Args:
360
387
  files: List of (file_path, module_name) tuples
361
388
  project_path: Base project path for the result
362
389
  """
363
390
  start_time = time.time()
364
-
391
+
365
392
  if self.config.verbose:
366
393
  print(f"Analyzing {len(files)} specific files")
367
-
368
- # Analyze files
369
- if self.config.performance.parallel_enabled and len(files) > 1:
370
- results = self._analyze_parallel(files)
371
- else:
372
- results = self._analyze_sequential(files)
373
-
374
- # Merge results
394
+
395
+ results = self._run_analysis(files)
375
396
  merged = self._merge_results(results, project_path)
376
-
377
- # Build call graph
378
- self._build_call_graph(merged)
379
-
380
- if not self.config.performance.skip_pattern_detection:
381
- self._detect_patterns(merged)
382
-
383
- # Refactoring analysis
384
- self.refactoring_analyzer.perform_refactoring_analysis(merged)
385
-
386
- # Calculate stats
387
- elapsed = time.time() - start_time
388
- merged.stats = {
389
- 'files_processed': len(files),
390
- 'functions_found': len(merged.functions),
391
- 'classes_found': len(merged.classes),
392
- 'nodes_created': len(merged.nodes),
393
- 'edges_created': len(merged.edges),
394
- 'patterns_detected': len(merged.patterns),
395
- 'analysis_time_seconds': round(elapsed, 2),
396
- 'cache_hits': sum(r.get('cache_hits', 0) for r in results),
397
- }
398
-
397
+ self._post_process(merged, files, results, start_time)
399
398
  return merged
400
399
 
401
400
  def _detect_patterns(self, result: AnalysisResult) -> None:
@@ -1,10 +1,62 @@
1
1
  """Configuration and constants for code2llm."""
2
2
 
3
+ import os
4
+ import psutil
3
5
  from dataclasses import dataclass, field
4
6
  from typing import List, Set
5
7
  from enum import Enum
6
8
 
7
9
 
10
+ def _get_optimal_workers(default: int = 4, max_per_gb: float = 2.0) -> int:
11
+ """Calculate optimal parallel workers based on CPU and available RAM.
12
+
13
+ Args:
14
+ default: Default workers if detection fails
15
+ max_per_gb: Max workers per GB of RAM
16
+
17
+ Returns:
18
+ Optimal worker count (at least 1)
19
+ """
20
+ try:
21
+ cpu_count = os.cpu_count() or default
22
+ available_ram_gb = psutil.virtual_memory().available / (1024 ** 3)
23
+ # Limit workers by RAM (assume each worker needs ~500MB)
24
+ ram_limited = int(available_ram_gb * max_per_gb)
25
+ # Take minimum of CPU and RAM limits, but at least 1
26
+ return max(1, min(cpu_count, ram_limited))
27
+ except Exception:
28
+ return default
29
+
30
+
31
+ # Performance limits (named constants for magic numbers)
32
+ DEFAULT_MAX_NODES_PER_FILE = 1000
33
+ DEFAULT_MAX_TOTAL_NODES = 10000
34
+ DEFAULT_MAX_EDGES = 50000
35
+ DEFAULT_CACHE_TTL_HOURS = 24
36
+ DEFAULT_MAX_MEMORY_MB = 2048
37
+ DEFAULT_PROGRESS_BAR_THRESHOLD = 50 # File count threshold for progress bar
38
+
39
+ # Complexity thresholds
40
+ CC_LOW_THRESHOLD = 5 # Rank A
41
+ CC_MEDIUM_THRESHOLD = 10 # Rank B
42
+ CC_HIGH_THRESHOLD = 20 # Rank C
43
+ CC_CRITICAL_THRESHOLD = 50 # For warnings
44
+
45
+ # Size limits
46
+ KB = 1024
47
+ MB = 1024 * 1024
48
+ MAX_FILE_SIZE_KB = 256
49
+ CHUNK_SIZE_KB = 256
50
+
51
+ # Timeouts
52
+ DEFAULT_PNG_TIMEOUT = 60
53
+ DEFAULT_MERMAID_MAX_TEXT_SIZE = 2_000_000
54
+ DEFAULT_MERMAID_MAX_EDGES = 20_000
55
+
56
+ # Cache settings
57
+ DEFAULT_CACHE_MAX_AGE_DAYS = 30
58
+
59
+
8
60
  class AnalysisMode(str, Enum):
9
61
  """Available analysis modes."""
10
62
  STATIC = "static"
@@ -19,17 +71,23 @@ class PerformanceConfig:
19
71
  """Performance optimization settings."""
20
72
  enable_cache: bool = True
21
73
  cache_dir: str = ".code2llm_cache"
22
- cache_ttl_hours: int = 24
23
- parallel_workers: int = 4
74
+ cache_ttl_hours: int = DEFAULT_CACHE_TTL_HOURS
75
+ parallel_workers: int = 0 # 0 = auto-detect based on CPU/RAM
24
76
  parallel_enabled: bool = True
25
- max_memory_mb: int = 2048
26
- max_nodes_per_file: int = 1000
27
- max_total_nodes: int = 10000
28
- max_edges: int = 50000
77
+ max_memory_mb: int = DEFAULT_MAX_MEMORY_MB
78
+ max_nodes_per_file: int = DEFAULT_MAX_NODES_PER_FILE
79
+ max_total_nodes: int = DEFAULT_MAX_TOTAL_NODES
80
+ max_edges: int = DEFAULT_MAX_EDGES
29
81
  fast_mode: bool = False
30
82
  skip_data_flow: bool = False
31
83
  skip_pattern_detection: bool = False
32
84
 
85
+ def get_workers(self) -> int:
86
+ """Get effective worker count (auto-detect if set to 0)."""
87
+ if self.parallel_workers <= 0:
88
+ return _get_optimal_workers(default=4)
89
+ return self.parallel_workers
90
+
33
91
 
34
92
  @dataclass
35
93
  class FilterConfig:
@@ -120,7 +178,7 @@ FAST_CONFIG = Config(
120
178
  skip_data_flow=True,
121
179
  skip_pattern_detection=True,
122
180
  parallel_enabled=True,
123
- parallel_workers=8,
181
+ parallel_workers=0, # auto-detect
124
182
  max_nodes_per_file=500,
125
183
  max_total_nodes=5000,
126
184
  ),
@@ -1,17 +1,13 @@
1
1
  """File analyzer for analyzing individual source files across multiple languages."""
2
2
 
3
3
  import ast
4
- import re
5
4
  from pathlib import Path
6
5
  from typing import Dict, List, Optional
7
6
 
8
7
  from radon.complexity import cc_visit, cc_rank
9
8
 
10
- from .config import Config, LANGUAGE_EXTENSIONS
11
- from .models import (
12
- AnalysisResult, ClassInfo, FlowEdge, FlowNode,
13
- FunctionInfo, ModuleInfo
14
- )
9
+ from .config import Config
10
+ from .models import ClassInfo, FlowEdge, FlowNode, FunctionInfo, ModuleInfo
15
11
  from code2llm.analysis.dfg import DFGExtractor
16
12
  from code2llm.analysis.call_graph import CallGraphExtractor
17
13
  from .file_filter import FastFileFilter
@@ -1,7 +1,7 @@
1
1
  """Gitignore support for code2llm file filtering."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import List, Set
4
+ from typing import List
5
5
  import re
6
6
 
7
7
 
@@ -8,11 +8,9 @@ Provides:
8
8
  """
9
9
 
10
10
  from abc import ABC, abstractmethod
11
- from typing import Dict, Any, Callable, Set, Optional
11
+ from typing import Dict, Any, Callable, Optional
12
12
  from pathlib import Path
13
13
 
14
- from code2llm.core.models import ModuleInfo, FunctionInfo, ClassInfo
15
-
16
14
 
17
15
  # Type alias for parser results
18
16
  ParserResult = Dict[str, Any]
@@ -3,6 +3,12 @@
3
3
  import re
4
4
  from typing import Dict, List
5
5
 
6
+ from code2llm.core.config import (
7
+ CC_LOW_THRESHOLD,
8
+ CC_MEDIUM_THRESHOLD,
9
+ CC_HIGH_THRESHOLD,
10
+ )
11
+
6
12
 
7
13
  # Branching keywords per language family
8
14
  CC_PATTERNS = {
@@ -64,7 +70,11 @@ def calculate_complexity_regex(content: str, result: Dict,
64
70
  cc = 1
65
71
  else:
66
72
  cc = 1 + len(pattern.findall(body))
67
- rank = 'A' if cc <= 5 else ('B' if cc <= 10 else ('C' if cc <= 20 else 'D'))
73
+ rank = (
74
+ 'A' if cc <= CC_LOW_THRESHOLD
75
+ else ('B' if cc <= CC_MEDIUM_THRESHOLD
76
+ else ('C' if cc <= CC_HIGH_THRESHOLD else 'D'))
77
+ )
68
78
  func_info.complexity = {
69
79
  'cyclomatic_complexity': cc,
70
80
  'cc_rank': rank,
@@ -1,6 +1,5 @@
1
1
  from dataclasses import dataclass, field, asdict
2
2
  from typing import List, Dict, Set, Optional, Any
3
- from pathlib import Path
4
3
 
5
4
 
6
5
  class BaseModel:
@@ -1,7 +1,6 @@
1
1
  """Refactoring analysis for code2llm."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional
5
4
 
6
5
  from .config import Config
7
6
  from .models import AnalysisResult