code2llm 0.5.119__tar.gz → 0.5.121__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {code2llm-0.5.119 → code2llm-0.5.121}/PKG-INFO +2 -2
  2. {code2llm-0.5.119 → code2llm-0.5.121}/README.md +1 -1
  3. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/__init__.py +1 -1
  4. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/__init__.py +4 -0
  5. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/call_graph.py +1 -4
  6. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/cfg.py +1 -4
  7. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/data_analysis.py +5 -0
  8. code2llm-0.5.121/code2llm/analysis/pipeline_classifier.py +100 -0
  9. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/pipeline_detector.py +29 -173
  10. code2llm-0.5.121/code2llm/analysis/pipeline_resolver.py +91 -0
  11. code2llm-0.5.121/code2llm/exporters/dashboard_data.py +163 -0
  12. code2llm-0.5.119/code2llm/exporters/html_dashboard.py → code2llm-0.5.121/code2llm/exporters/dashboard_renderer.py +47 -209
  13. code2llm-0.5.121/code2llm/exporters/html_dashboard.py +68 -0
  14. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/map_exporter.py +9 -12
  15. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/helpers.py +2 -16
  16. code2llm-0.5.121/code2llm/exporters/toon/metrics.py +98 -0
  17. code2llm-0.5.119/code2llm/exporters/toon/metrics.py → code2llm-0.5.121/code2llm/exporters/toon/metrics_core.py +41 -237
  18. code2llm-0.5.121/code2llm/exporters/toon/metrics_duplicates.py +78 -0
  19. code2llm-0.5.121/code2llm/exporters/toon/metrics_health.py +98 -0
  20. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/llm_flow.py +11 -0
  21. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/mermaid.py +77 -79
  22. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/__init__.py +1 -1
  23. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/PKG-INFO +2 -2
  24. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/SOURCES.txt +7 -0
  25. {code2llm-0.5.119 → code2llm-0.5.121}/pyproject.toml +1 -1
  26. {code2llm-0.5.119 → code2llm-0.5.121}/setup.py +1 -1
  27. {code2llm-0.5.119 → code2llm-0.5.121}/LICENSE +0 -0
  28. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/__main__.py +0 -0
  29. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/coupling.py +0 -0
  30. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/dfg.py +0 -0
  31. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/side_effects.py +0 -0
  32. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/smells.py +0 -0
  33. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/type_inference.py +0 -0
  34. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/utils/__init__.py +0 -0
  35. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/utils/ast_helpers.py +0 -0
  36. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/api.py +0 -0
  37. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli.py +0 -0
  38. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_analysis.py +0 -0
  39. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_commands.py +0 -0
  40. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/__init__.py +0 -0
  41. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/code2logic.py +0 -0
  42. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/formats.py +0 -0
  43. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/orchestrator.py +0 -0
  44. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/prompt.py +0 -0
  45. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_parser.py +0 -0
  46. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/__init__.py +0 -0
  47. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/analyzer.py +0 -0
  48. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/ast_registry.py +0 -0
  49. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/config.py +0 -0
  50. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/export_pipeline.py +0 -0
  51. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_analyzer.py +0 -0
  52. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_cache.py +0 -0
  53. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_filter.py +0 -0
  54. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/gitignore.py +0 -0
  55. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/incremental.py +0 -0
  56. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/__init__.py +0 -0
  57. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/base.py +0 -0
  58. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/cpp.py +0 -0
  59. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/csharp.py +0 -0
  60. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/generic.py +0 -0
  61. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/go_lang.py +0 -0
  62. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/java.py +0 -0
  63. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/php.py +0 -0
  64. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ruby.py +0 -0
  65. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/rust.py +0 -0
  66. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ts_extractors.py +0 -0
  67. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ts_parser.py +0 -0
  68. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/typescript.py +0 -0
  69. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/large_repo.py +0 -0
  70. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/models.py +0 -0
  71. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/persistent_cache.py +0 -0
  72. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/refactoring.py +0 -0
  73. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/repo_files.py +0 -0
  74. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/__init__.py +0 -0
  75. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/cache.py +0 -0
  76. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/incremental.py +0 -0
  77. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/prioritizer.py +0 -0
  78. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/scanner.py +0 -0
  79. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/strategies.py +0 -0
  80. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming_analyzer.py +0 -0
  81. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/toon_size_manager.py +0 -0
  82. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/__init__.py +0 -0
  83. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/article_view.py +0 -0
  84. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/base.py +0 -0
  85. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/context_exporter.py +0 -0
  86. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/context_view.py +0 -0
  87. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/evolution_exporter.py +0 -0
  88. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_constants.py +0 -0
  89. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_exporter.py +0 -0
  90. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_renderer.py +0 -0
  91. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/__init__.py +0 -0
  92. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/renderer.py +0 -0
  93. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/scanner.py +0 -0
  94. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator.py +0 -0
  95. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/json_exporter.py +0 -0
  96. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/llm_exporter.py +0 -0
  97. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/mermaid_exporter.py +0 -0
  98. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/mermaid_flow_helpers.py +0 -0
  99. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/__init__.py +0 -0
  100. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/constants.py +0 -0
  101. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/core.py +0 -0
  102. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/evolution.py +0 -0
  103. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/health.py +0 -0
  104. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/hotspots.py +0 -0
  105. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/modules.py +0 -0
  106. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml_exporter.py +0 -0
  107. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/readme_exporter.py +0 -0
  108. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/report_generators.py +0 -0
  109. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/__init__.py +0 -0
  110. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/module_detail.py +0 -0
  111. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/renderer.py +0 -0
  112. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon.py +0 -0
  113. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon_view.py +0 -0
  114. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/validate_project.py +0 -0
  115. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/yaml_exporter.py +0 -0
  116. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/__init__.py +0 -0
  117. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/_utils.py +0 -0
  118. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/llm_task.py +0 -0
  119. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/config.py +0 -0
  120. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/entity_resolution.py +0 -0
  121. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/intent_matching.py +0 -0
  122. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/normalization.py +0 -0
  123. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/pipeline.py +0 -0
  124. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/patterns/__init__.py +0 -0
  125. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/patterns/detector.py +0 -0
  126. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/refactor/__init__.py +0 -0
  127. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/refactor/prompt_engine.py +0 -0
  128. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/dependency_links.txt +0 -0
  129. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/entry_points.txt +0 -0
  130. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/requires.txt +0 -0
  131. {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/top_level.txt +0 -0
  132. {code2llm-0.5.119 → code2llm-0.5.121}/setup.cfg +0 -0
  133. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_advanced_analysis.py +0 -0
  134. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_analyzer.py +0 -0
  135. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_calls_toon_export.py +0 -0
  136. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_deep_analysis.py +0 -0
  137. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_edge_cases.py +0 -0
  138. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_flow_exporter.py +0 -0
  139. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_format_quality.py +0 -0
  140. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_multilanguage_e2e.py +0 -0
  141. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_nlp_pipeline.py +0 -0
  142. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_nonpython_cc_calls.py +0 -0
  143. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_persistent_cache.py +0 -0
  144. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_pipeline_detector.py +0 -0
  145. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_project_toon_export.py +0 -0
  146. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_prompt_engine.py +0 -0
  147. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_prompt_txt.py +0 -0
  148. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_refactoring_engine.py +0 -0
  149. {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_toon_v2.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2llm
3
- Version: 0.5.119
3
+ Version: 0.5.121
4
4
  Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
5
5
  Home-page: https://github.com/wronai/stts
6
6
  Author: STTS Project
@@ -67,7 +67,7 @@ Dynamic: requires-python
67
67
 
68
68
  ## AI Cost Tracking
69
69
 
70
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.119-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
70
+ ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
71
71
  ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
72
72
 
73
73
  - 🤖 **LLM usage:** $7.5000 (166 commits)
@@ -3,7 +3,7 @@
3
3
 
4
4
  ## AI Cost Tracking
5
5
 
6
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.119-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
6
+ ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
7
7
  ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
8
8
 
9
9
  - 🤖 **LLM usage:** $7.5000 (166 commits)
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
8
8
  and entity resolution with multilingual support.
9
9
  """
10
10
 
11
- __version__ = "0.5.119"
11
+ __version__ = "0.5.121"
12
12
  __author__ = "STTS Project"
13
13
 
14
14
  # Core analysis components (lightweight, always needed)
@@ -10,6 +10,8 @@ __all__ = [
10
10
  'TypeInferenceEngine',
11
11
  'SideEffectDetector',
12
12
  'PipelineDetector',
13
+ 'PipelineResolver',
14
+ 'PipelineClassifier',
13
15
  ]
14
16
 
15
17
 
@@ -25,6 +27,8 @@ def __getattr__(name):
25
27
  'TypeInferenceEngine': '.type_inference',
26
28
  'SideEffectDetector': '.side_effects',
27
29
  'PipelineDetector': '.pipeline_detector',
30
+ 'PipelineResolver': '.pipeline_resolver',
31
+ 'PipelineClassifier': '.pipeline_classifier',
28
32
  }
29
33
  if name in _imports:
30
34
  import importlib
@@ -97,7 +97,7 @@ class CallGraphExtractor(ast.NodeVisitor):
97
97
 
98
98
  def visit_FunctionDef(self, node: ast.FunctionDef):
99
99
  """Visit function definition and track calls within it."""
100
- func_name = self._qualified_name(node.name)
100
+ func_name = qualified_name(self.module_name, self.class_stack, node.name)
101
101
  self.function_stack.append(func_name)
102
102
 
103
103
  # Visit body to find calls
@@ -139,9 +139,6 @@ class CallGraphExtractor(ast.NodeVisitor):
139
139
 
140
140
  self.generic_visit(node)
141
141
 
142
- def _qualified_name(self, name: str) -> str:
143
- return qualified_name(self.module_name, self.class_stack, name)
144
-
145
142
  def _resolve_call(self, node: ast.AST) -> Optional[str]:
146
143
  """Resolve a call to its full name."""
147
144
  if isinstance(node, ast.Name):
@@ -69,7 +69,7 @@ class CFGExtractor(ast.NodeVisitor):
69
69
 
70
70
  def visit_FunctionDef(self, node: ast.FunctionDef):
71
71
  """Visit function definition."""
72
- func_name = self._qualified_name(node.name)
72
+ func_name = qualified_name(self.module_name, self.class_stack, node.name)
73
73
  self.function_stack.append(func_name)
74
74
 
75
75
  # Create entry node
@@ -260,9 +260,6 @@ class CFGExtractor(ast.NodeVisitor):
260
260
  else:
261
261
  self.generic_visit(node)
262
262
 
263
- def _qualified_name(self, name: str) -> str:
264
- return qualified_name(self.module_name, self.class_stack, name)
265
-
266
263
  def _extract_condition(self, node: ast.AST) -> str:
267
264
  """Extract condition as string."""
268
265
  try:
@@ -206,6 +206,7 @@ class DataAnalyzer:
206
206
  return sorted(data_types.values(), key=lambda x: x['usage_count'], reverse=True)
207
207
 
208
208
  def _infer_parameter_types(self, func) -> list:
209
+ """Infer parameter types from function name patterns."""
209
210
  params = []
210
211
  name = func.name.lower()
211
212
  if 'list' in name or 'items' in name: params.append('list')
@@ -215,6 +216,7 @@ class DataAnalyzer:
215
216
  return params
216
217
 
217
218
  def _infer_return_types(self, func) -> list:
219
+ """Infer return types from function name patterns."""
218
220
  returns = []
219
221
  name = func.name.lower()
220
222
  if name.startswith(('get_', 'find_')): returns.append('dict')
@@ -224,6 +226,7 @@ class DataAnalyzer:
224
226
  return returns
225
227
 
226
228
  def _build_data_flow_graph(self, result: AnalysisResult) -> dict:
229
+ """Build data flow graph from function relationships."""
227
230
  nodes = {}
228
231
  edges = []
229
232
  for func_name, func in result.functions.items():
@@ -255,6 +258,7 @@ class DataAnalyzer:
255
258
  return list(set(types))
256
259
 
257
260
  def _identify_process_patterns(self, result: AnalysisResult) -> list:
261
+ """Identify common data processing patterns (filter, map, reduce, etc.)."""
258
262
  patterns = {'filter': [], 'map': [], 'reduce': [], 'aggregate': [], 'transform': [], 'validate': []}
259
263
  indicators = {
260
264
  'filter': ['filter', 'select', 'where', 'find'], 'map': ['map', 'transform', 'process'],
@@ -274,6 +278,7 @@ class DataAnalyzer:
274
278
  return sorted(res, key=lambda x: x['count'], reverse=True)
275
279
 
276
280
  def _analyze_optimization_opportunities(self, result: AnalysisResult, data_types: list, dfg: dict) -> dict:
281
+ """Analyze optimization opportunities in data handling."""
277
282
  opt = {'potential_score': 0.0, 'type_consolidation': [], 'process_consolidation': [], 'hub_optimization': [], 'recommendations': []}
278
283
  similar = {}
279
284
  for dt in data_types:
@@ -0,0 +1,100 @@
1
+ """Pipeline Classifier — domain classification and naming for pipelines.
2
+
3
+ Groups pipelines by module domain (NLP, Analysis, Export, Refactor, etc.)
4
+ and derives human-readable pipeline names.
5
+ """
6
+
7
+ from collections import defaultdict
8
+ from typing import Dict, List, Optional
9
+
10
+ from code2llm.core.models import FunctionInfo
11
+ from code2llm.analysis.type_inference import TypeInferenceEngine
12
+
13
+ # Module-to-domain mapping heuristics
14
+ DOMAIN_KEYWORDS: Dict[str, List[str]] = {
15
+ "NLP": ["nlp", "natural", "language", "intent", "entity",
16
+ "query", "normalize", "tokenize", "match"],
17
+ "Analysis": ["analysis", "analyzer", "analyse", "analyze",
18
+ "metric", "complexity", "cfg", "dfg", "call_graph"],
19
+ "Export": ["export", "exporter", "render", "format", "output",
20
+ "toon", "mermaid", "json_export", "yaml_export"],
21
+ "Refactor": ["refactor", "smell", "suggest", "fix", "patch",
22
+ "template", "prompt", "engine"],
23
+ "Core": ["core", "config", "model", "base", "util", "helper"],
24
+ "IO": ["io", "file", "path", "read", "write", "load", "save",
25
+ "cache", "storage"],
26
+ }
27
+
28
+
29
+ class PipelineClassifier:
30
+ """Classify pipelines by domain and derive human-readable names."""
31
+
32
+ def __init__(self, type_engine: Optional[TypeInferenceEngine] = None):
33
+ self._type_engine = type_engine or TypeInferenceEngine()
34
+
35
+ def classify_domain(
36
+ self, path: List[str], funcs: Dict[str, FunctionInfo]
37
+ ) -> str:
38
+ """Classify pipeline domain by analyzing module names and function names."""
39
+ scores: Dict[str, int] = defaultdict(int)
40
+
41
+ for qname in path:
42
+ fi = funcs.get(qname)
43
+ if not fi:
44
+ continue
45
+ text = f"{fi.module} {fi.name}".lower()
46
+ for domain, keywords in DOMAIN_KEYWORDS.items():
47
+ for kw in keywords:
48
+ if kw in text:
49
+ scores[domain] += 1
50
+
51
+ if scores:
52
+ return max(scores, key=scores.get)
53
+ return "Unknown"
54
+
55
+ def derive_pipeline_name(
56
+ self,
57
+ path: List[str],
58
+ funcs: Dict[str, FunctionInfo],
59
+ domain: str,
60
+ ) -> str:
61
+ """Derive a human-readable pipeline name."""
62
+ # Use the dominant sub-module name
63
+ module_counts: Dict[str, int] = defaultdict(int)
64
+ for qname in path:
65
+ fi = funcs.get(qname)
66
+ if fi:
67
+ parts = fi.module.split(".")
68
+ # Use most specific module component
69
+ for part in parts:
70
+ if part and part not in ("code2llm", "__init__"):
71
+ module_counts[part] += 1
72
+
73
+ if module_counts:
74
+ dominant = max(module_counts, key=module_counts.get)
75
+ # Capitalize and use domain if module name is generic
76
+ if dominant in ("core", "base", "utils", "helpers"):
77
+ return domain
78
+ return dominant.capitalize()
79
+
80
+ return domain
81
+
82
+ def get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
83
+ """Get the input type of a pipeline's entry point."""
84
+ if not fi:
85
+ return "?"
86
+ args = self._type_engine.get_arg_types(fi)
87
+ for arg in args:
88
+ if arg["name"] == "self":
89
+ continue
90
+ if arg.get("type"):
91
+ return arg["type"]
92
+ return arg["name"]
93
+ return "?"
94
+
95
+ def get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
96
+ """Get the output type of a pipeline's exit point."""
97
+ if not fi:
98
+ return "?"
99
+ ret = self._type_engine.get_return_type(fi)
100
+ return ret if ret else "?"
@@ -8,18 +8,20 @@ Uses call graph analysis with networkx to:
8
8
  - Aggregate purity per pipeline using SideEffectDetector
9
9
 
10
10
  Sprint 3 (v0.3.2): Replaces the custom DFS chain-tracing in FlowExporter.
11
+ Refactored v0.5.x: Extracted resolver and classifier into separate modules.
11
12
  """
12
13
 
13
14
  import logging
14
- from collections import defaultdict
15
15
  from dataclasses import dataclass, field
16
- from typing import Any, Dict, List, Optional, Set, Tuple
16
+ from typing import Any, Dict, List, Optional, Set
17
17
 
18
18
  import networkx as nx
19
19
 
20
- from code2llm.core.models import AnalysisResult, FunctionInfo
20
+ from code2llm.core.models import FunctionInfo
21
21
  from .side_effects import SideEffectDetector, SideEffectInfo
22
22
  from .type_inference import TypeInferenceEngine
23
+ from .pipeline_resolver import PipelineResolver
24
+ from .pipeline_classifier import PipelineClassifier, DOMAIN_KEYWORDS
23
25
 
24
26
  logger = logging.getLogger(__name__)
25
27
 
@@ -28,28 +30,6 @@ MIN_PIPELINE_LENGTH = 3
28
30
  MAX_PIPELINES = 12
29
31
  CC_HIGH = 15
30
32
 
31
- # Patterns to exclude from analysis
32
- EXCLUDE_PATTERNS = frozenset({
33
- 'venv', '.venv', 'env', '.env', 'publish-env', 'test-env',
34
- 'site-packages', 'node_modules', '__pycache__', '.git',
35
- 'dist', 'build', 'egg-info', '.tox', '.mypy_cache',
36
- })
37
-
38
- # Module-to-domain mapping heuristics
39
- DOMAIN_KEYWORDS: Dict[str, List[str]] = {
40
- "NLP": ["nlp", "natural", "language", "intent", "entity",
41
- "query", "normalize", "tokenize", "match"],
42
- "Analysis": ["analysis", "analyzer", "analyse", "analyze",
43
- "metric", "complexity", "cfg", "dfg", "call_graph"],
44
- "Export": ["export", "exporter", "render", "format", "output",
45
- "toon", "mermaid", "json_export", "yaml_export"],
46
- "Refactor": ["refactor", "smell", "suggest", "fix", "patch",
47
- "template", "prompt", "engine"],
48
- "Core": ["core", "config", "model", "base", "util", "helper"],
49
- "IO": ["io", "file", "path", "read", "write", "load", "save",
50
- "cache", "storage"],
51
- }
52
-
53
33
 
54
34
  @dataclass
55
35
  class PipelineStage:
@@ -112,11 +92,28 @@ class Pipeline:
112
92
  }
113
93
 
114
94
 
95
+ # Re-export for backward compatibility
96
+ __all__ = [
97
+ 'PipelineDetector',
98
+ 'Pipeline',
99
+ 'PipelineStage',
100
+ 'PipelineResolver',
101
+ 'PipelineClassifier',
102
+ 'DOMAIN_KEYWORDS',
103
+ 'MIN_PIPELINE_LENGTH',
104
+ 'MAX_PIPELINES',
105
+ 'CC_HIGH',
106
+ ]
107
+
108
+
115
109
  class PipelineDetector:
116
110
  """Detect pipelines in a codebase using networkx graph analysis.
117
111
 
118
112
  Builds a call graph as a DiGraph, finds longest paths as pipeline
119
113
  candidates, groups by module domain, and labels entry/exit points.
114
+
115
+ Refactored to delegate resolution and classification to specialized
116
+ helper classes: PipelineResolver and PipelineClassifier.
120
117
  """
121
118
 
122
119
  def __init__(
@@ -126,6 +123,8 @@ class PipelineDetector:
126
123
  ):
127
124
  self._type_engine = type_engine or TypeInferenceEngine()
128
125
  self._se_detector = side_effect_detector or SideEffectDetector()
126
+ self._resolver = PipelineResolver()
127
+ self._classifier = PipelineClassifier(self._type_engine)
129
128
 
130
129
  def detect(
131
130
  self,
@@ -177,7 +176,7 @@ class PipelineDetector:
177
176
 
178
177
  for qname, fi in funcs.items():
179
178
  for callee in fi.calls:
180
- resolved = self._resolve_callee(callee, funcs, caller=fi)
179
+ resolved = self._resolver.resolve(callee, funcs, caller=fi)
181
180
  if resolved and resolved != qname: # no self-loops
182
181
  G.add_edge(qname, resolved)
183
182
 
@@ -303,8 +302,8 @@ class PipelineDetector:
303
302
  if not stages:
304
303
  continue
305
304
 
306
- domain = self._classify_domain(path, funcs)
307
- name = self._derive_pipeline_name(path, funcs, domain)
305
+ domain = self._classifier.classify_domain(path, funcs)
306
+ name = self._classifier.derive_pipeline_name(path, funcs, domain)
308
307
 
309
308
  # Entry/exit labeling
310
309
  stages[0].is_entry = True
@@ -315,8 +314,8 @@ class PipelineDetector:
315
314
  bottleneck = max(stages, key=lambda s: s.cc) if stages else None
316
315
 
317
316
  # Entry/exit types
318
- entry_type = self._get_entry_type(funcs.get(path[0]))
319
- exit_type = self._get_exit_type(funcs.get(path[-1]))
317
+ entry_type = self._classifier.get_entry_type(funcs.get(path[0]))
318
+ exit_type = self._classifier.get_exit_type(funcs.get(path[-1]))
320
319
 
321
320
  pipeline = Pipeline(
322
321
  name=name,
@@ -361,146 +360,3 @@ class PipelineDetector:
361
360
  side_effect_summary=se_summary,
362
361
  ))
363
362
  return stages
364
-
365
- # ------------------------------------------------------------------
366
- # domain classification
367
- # ------------------------------------------------------------------
368
- def _classify_domain(
369
- self, path: List[str], funcs: Dict[str, FunctionInfo]
370
- ) -> str:
371
- """Classify pipeline domain by analyzing module names and function names."""
372
- scores: Dict[str, int] = defaultdict(int)
373
-
374
- for qname in path:
375
- fi = funcs.get(qname)
376
- if not fi:
377
- continue
378
- text = f"{fi.module} {fi.name}".lower()
379
- for domain, keywords in DOMAIN_KEYWORDS.items():
380
- for kw in keywords:
381
- if kw in text:
382
- scores[domain] += 1
383
-
384
- if scores:
385
- return max(scores, key=scores.get)
386
- return "Unknown"
387
-
388
- def _derive_pipeline_name(
389
- self, path: List[str],
390
- funcs: Dict[str, FunctionInfo],
391
- domain: str,
392
- ) -> str:
393
- """Derive a human-readable pipeline name."""
394
- # Use the dominant sub-module name
395
- module_counts: Dict[str, int] = defaultdict(int)
396
- for qname in path:
397
- fi = funcs.get(qname)
398
- if fi:
399
- parts = fi.module.split(".")
400
- # Use most specific module component
401
- for part in parts:
402
- if part and part not in ("code2llm", "__init__"):
403
- module_counts[part] += 1
404
-
405
- if module_counts:
406
- dominant = max(module_counts, key=module_counts.get)
407
- # Capitalize and use domain if module name is generic
408
- if dominant in ("core", "base", "utils", "helpers"):
409
- return domain
410
- return dominant.capitalize()
411
-
412
- return domain
413
-
414
- # ------------------------------------------------------------------
415
- # type helpers
416
- # ------------------------------------------------------------------
417
- def _get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
418
- """Get the input type of a pipeline's entry point."""
419
- if not fi:
420
- return "?"
421
- args = self._type_engine.get_arg_types(fi)
422
- for arg in args:
423
- if arg["name"] == "self":
424
- continue
425
- if arg.get("type"):
426
- return arg["type"]
427
- return arg["name"]
428
- return "?"
429
-
430
- def _get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
431
- """Get the output type of a pipeline's exit point."""
432
- if not fi:
433
- return "?"
434
- ret = self._type_engine.get_return_type(fi)
435
- return ret if ret else "?"
436
-
437
- # ------------------------------------------------------------------
438
- # callee resolution
439
- # ------------------------------------------------------------------
440
- def _resolve_callee(
441
- self, callee: str, funcs: Dict[str, FunctionInfo],
442
- caller: Optional[FunctionInfo] = None,
443
- ) -> Optional[str]:
444
- """Resolve callee name to qualified name.
445
-
446
- Handles:
447
- - Direct qualified matches
448
- - self.method → same-class method resolution
449
- - Unqualified names with same-class preference
450
-
451
- Returns None for ambiguous matches (multiple candidates)
452
- to avoid creating phantom pipeline edges.
453
- """
454
- # Direct match
455
- if callee in funcs:
456
- return callee
457
-
458
- bare, is_self_call = self._strip_self_prefix(callee)
459
-
460
- # Try same-class resolution first
461
- if result := self._try_same_class_resolution(bare, caller, funcs):
462
- return result
463
-
464
- # Suffix match
465
- candidates = self._get_suffix_candidates(bare, funcs)
466
- if len(candidates) == 1:
467
- return candidates[0]
468
-
469
- # Prefer same-class candidates for method calls
470
- return self._select_same_class_candidate(candidates, caller, is_self_call)
471
-
472
- def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
473
- """Strip self. prefix and return bare name + flag."""
474
- if callee.startswith("self."):
475
- return callee[5:], True
476
- return callee, False
477
-
478
- def _try_same_class_resolution(
479
- self, bare: str, caller: Optional[FunctionInfo], funcs: Dict[str, FunctionInfo]
480
- ) -> Optional[str]:
481
- """Try to resolve method in the same class as caller."""
482
- if caller and caller.class_name:
483
- class_prefix = f"{caller.module}.{caller.class_name}."
484
- class_candidate = class_prefix + bare
485
- if class_candidate in funcs:
486
- return class_candidate
487
- return None
488
-
489
- def _get_suffix_candidates(self, bare: str, funcs: Dict[str, FunctionInfo]) -> List[str]:
490
- """Find candidates matching by suffix."""
491
- return [qn for qn in funcs if qn.endswith(f".{bare}")]
492
-
493
- def _select_same_class_candidate(
494
- self, candidates: List[str], caller: Optional[FunctionInfo], is_self_call: bool
495
- ) -> Optional[str]:
496
- """Select candidate from same class if applicable."""
497
- if not candidates or not (is_self_call or (caller and caller.class_name)):
498
- return None
499
-
500
- same_class = [
501
- qn for qn in candidates
502
- if caller and caller.class_name and f".{caller.class_name}." in qn
503
- ]
504
- if len(same_class) == 1:
505
- return same_class[0]
506
- return None
@@ -0,0 +1,91 @@
1
+ """Pipeline Resolver — callee resolution for pipeline detection.
2
+
3
+ Handles resolution of function calls to qualified names,
4
+ including self.method resolution within the same class.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ from code2llm.core.models import FunctionInfo
10
+
11
+
12
+ class PipelineResolver:
13
+ """Resolves callee names to qualified function names."""
14
+
15
+ def resolve(
16
+ self,
17
+ callee: str,
18
+ funcs: Dict[str, FunctionInfo],
19
+ caller: Optional[FunctionInfo] = None,
20
+ ) -> Optional[str]:
21
+ """Resolve callee name to qualified name.
22
+
23
+ Handles:
24
+ - Direct qualified matches
25
+ - self.method → same-class method resolution
26
+ - Unqualified names with same-class preference
27
+
28
+ Returns None for ambiguous matches (multiple candidates)
29
+ to avoid creating phantom pipeline edges.
30
+ """
31
+ # Direct match
32
+ if callee in funcs:
33
+ return callee
34
+
35
+ bare, is_self_call = self._strip_self_prefix(callee)
36
+
37
+ # Try same-class resolution first
38
+ if result := self._try_same_class_resolution(bare, caller, funcs):
39
+ return result
40
+
41
+ # Suffix match
42
+ candidates = self._get_suffix_candidates(bare, funcs)
43
+ if len(candidates) == 1:
44
+ return candidates[0]
45
+
46
+ # Prefer same-class candidates for method calls
47
+ return self._select_same_class_candidate(candidates, caller, is_self_call)
48
+
49
+ def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
50
+ """Strip self. prefix and return bare name + flag."""
51
+ if callee.startswith("self."):
52
+ return callee[5:], True
53
+ return callee, False
54
+
55
+ def _try_same_class_resolution(
56
+ self,
57
+ bare: str,
58
+ caller: Optional[FunctionInfo],
59
+ funcs: Dict[str, FunctionInfo],
60
+ ) -> Optional[str]:
61
+ """Try to resolve method in the same class as caller."""
62
+ if caller and caller.class_name:
63
+ class_prefix = f"{caller.module}.{caller.class_name}."
64
+ class_candidate = class_prefix + bare
65
+ if class_candidate in funcs:
66
+ return class_candidate
67
+ return None
68
+
69
+ def _get_suffix_candidates(
70
+ self, bare: str, funcs: Dict[str, FunctionInfo]
71
+ ) -> List[str]:
72
+ """Find candidates matching by suffix."""
73
+ return [qn for qn in funcs if qn.endswith(f".{bare}")]
74
+
75
+ def _select_same_class_candidate(
76
+ self,
77
+ candidates: List[str],
78
+ caller: Optional[FunctionInfo],
79
+ is_self_call: bool,
80
+ ) -> Optional[str]:
81
+ """Select candidate from same class if applicable."""
82
+ if not candidates or not (is_self_call or (caller and caller.class_name)):
83
+ return None
84
+
85
+ same_class = [
86
+ qn for qn in candidates
87
+ if caller and caller.class_name and f".{caller.class_name}." in qn
88
+ ]
89
+ if len(same_class) == 1:
90
+ return same_class[0]
91
+ return None