code2llm 0.5.119__tar.gz → 0.5.121__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code2llm-0.5.119 → code2llm-0.5.121}/PKG-INFO +2 -2
- {code2llm-0.5.119 → code2llm-0.5.121}/README.md +1 -1
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/__init__.py +1 -1
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/__init__.py +4 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/call_graph.py +1 -4
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/cfg.py +1 -4
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/data_analysis.py +5 -0
- code2llm-0.5.121/code2llm/analysis/pipeline_classifier.py +100 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/pipeline_detector.py +29 -173
- code2llm-0.5.121/code2llm/analysis/pipeline_resolver.py +91 -0
- code2llm-0.5.121/code2llm/exporters/dashboard_data.py +163 -0
- code2llm-0.5.119/code2llm/exporters/html_dashboard.py → code2llm-0.5.121/code2llm/exporters/dashboard_renderer.py +47 -209
- code2llm-0.5.121/code2llm/exporters/html_dashboard.py +68 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/map_exporter.py +9 -12
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/helpers.py +2 -16
- code2llm-0.5.121/code2llm/exporters/toon/metrics.py +98 -0
- code2llm-0.5.119/code2llm/exporters/toon/metrics.py → code2llm-0.5.121/code2llm/exporters/toon/metrics_core.py +41 -237
- code2llm-0.5.121/code2llm/exporters/toon/metrics_duplicates.py +78 -0
- code2llm-0.5.121/code2llm/exporters/toon/metrics_health.py +98 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/llm_flow.py +11 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/mermaid.py +77 -79
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/__init__.py +1 -1
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/PKG-INFO +2 -2
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/SOURCES.txt +7 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/pyproject.toml +1 -1
- {code2llm-0.5.119 → code2llm-0.5.121}/setup.py +1 -1
- {code2llm-0.5.119 → code2llm-0.5.121}/LICENSE +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/__main__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/coupling.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/dfg.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/side_effects.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/smells.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/type_inference.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/utils/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/utils/ast_helpers.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/api.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_analysis.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_commands.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/code2logic.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/formats.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/orchestrator.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_exports/prompt.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/cli_parser.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/analyzer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/ast_registry.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/config.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/export_pipeline.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_analyzer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_cache.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/file_filter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/gitignore.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/incremental.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/base.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/cpp.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/csharp.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/generic.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/go_lang.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/java.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/php.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ruby.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/rust.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ts_extractors.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/ts_parser.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/lang/typescript.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/large_repo.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/models.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/persistent_cache.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/refactoring.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/repo_files.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/cache.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/incremental.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/prioritizer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/scanner.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming/strategies.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/streaming_analyzer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/core/toon_size_manager.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/article_view.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/base.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/context_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/context_view.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/evolution_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_constants.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/flow_renderer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/renderer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator/scanner.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/index_generator.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/json_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/llm_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/mermaid_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/mermaid_flow_helpers.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/constants.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/core.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/evolution.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/health.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/hotspots.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml/modules.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/project_yaml_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/readme_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/report_generators.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/module_detail.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon/renderer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/toon_view.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/validate_project.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/exporters/yaml_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/_utils.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/generators/llm_task.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/config.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/entity_resolution.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/intent_matching.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/normalization.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/nlp/pipeline.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/patterns/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/patterns/detector.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/refactor/__init__.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm/refactor/prompt_engine.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/dependency_links.txt +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/entry_points.txt +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/requires.txt +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/code2llm.egg-info/top_level.txt +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/setup.cfg +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_advanced_analysis.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_analyzer.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_calls_toon_export.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_deep_analysis.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_edge_cases.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_flow_exporter.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_format_quality.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_multilanguage_e2e.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_nlp_pipeline.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_nonpython_cc_calls.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_persistent_cache.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_pipeline_detector.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_project_toon_export.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_prompt_engine.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_prompt_txt.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_refactoring_engine.py +0 -0
- {code2llm-0.5.119 → code2llm-0.5.121}/tests/test_toon_v2.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code2llm
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.121
|
|
4
4
|
Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
|
|
5
5
|
Home-page: https://github.com/wronai/stts
|
|
6
6
|
Author: STTS Project
|
|
@@ -67,7 +67,7 @@ Dynamic: requires-python
|
|
|
67
67
|
|
|
68
68
|
## AI Cost Tracking
|
|
69
69
|
|
|
70
|
-
    
|
|
71
71
|
  
|
|
72
72
|
|
|
73
73
|
- 🤖 **LLM usage:** $7.5000 (166 commits)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
## AI Cost Tracking
|
|
5
5
|
|
|
6
|
-
    
|
|
7
7
|
  
|
|
8
8
|
|
|
9
9
|
- 🤖 **LLM usage:** $7.5000 (166 commits)
|
|
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
|
|
|
8
8
|
and entity resolution with multilingual support.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
__version__ = "0.5.
|
|
11
|
+
__version__ = "0.5.121"
|
|
12
12
|
__author__ = "STTS Project"
|
|
13
13
|
|
|
14
14
|
# Core analysis components (lightweight, always needed)
|
|
@@ -10,6 +10,8 @@ __all__ = [
|
|
|
10
10
|
'TypeInferenceEngine',
|
|
11
11
|
'SideEffectDetector',
|
|
12
12
|
'PipelineDetector',
|
|
13
|
+
'PipelineResolver',
|
|
14
|
+
'PipelineClassifier',
|
|
13
15
|
]
|
|
14
16
|
|
|
15
17
|
|
|
@@ -25,6 +27,8 @@ def __getattr__(name):
|
|
|
25
27
|
'TypeInferenceEngine': '.type_inference',
|
|
26
28
|
'SideEffectDetector': '.side_effects',
|
|
27
29
|
'PipelineDetector': '.pipeline_detector',
|
|
30
|
+
'PipelineResolver': '.pipeline_resolver',
|
|
31
|
+
'PipelineClassifier': '.pipeline_classifier',
|
|
28
32
|
}
|
|
29
33
|
if name in _imports:
|
|
30
34
|
import importlib
|
|
@@ -97,7 +97,7 @@ class CallGraphExtractor(ast.NodeVisitor):
|
|
|
97
97
|
|
|
98
98
|
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
99
99
|
"""Visit function definition and track calls within it."""
|
|
100
|
-
func_name = self.
|
|
100
|
+
func_name = qualified_name(self.module_name, self.class_stack, node.name)
|
|
101
101
|
self.function_stack.append(func_name)
|
|
102
102
|
|
|
103
103
|
# Visit body to find calls
|
|
@@ -139,9 +139,6 @@ class CallGraphExtractor(ast.NodeVisitor):
|
|
|
139
139
|
|
|
140
140
|
self.generic_visit(node)
|
|
141
141
|
|
|
142
|
-
def _qualified_name(self, name: str) -> str:
|
|
143
|
-
return qualified_name(self.module_name, self.class_stack, name)
|
|
144
|
-
|
|
145
142
|
def _resolve_call(self, node: ast.AST) -> Optional[str]:
|
|
146
143
|
"""Resolve a call to its full name."""
|
|
147
144
|
if isinstance(node, ast.Name):
|
|
@@ -69,7 +69,7 @@ class CFGExtractor(ast.NodeVisitor):
|
|
|
69
69
|
|
|
70
70
|
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
71
71
|
"""Visit function definition."""
|
|
72
|
-
func_name = self.
|
|
72
|
+
func_name = qualified_name(self.module_name, self.class_stack, node.name)
|
|
73
73
|
self.function_stack.append(func_name)
|
|
74
74
|
|
|
75
75
|
# Create entry node
|
|
@@ -260,9 +260,6 @@ class CFGExtractor(ast.NodeVisitor):
|
|
|
260
260
|
else:
|
|
261
261
|
self.generic_visit(node)
|
|
262
262
|
|
|
263
|
-
def _qualified_name(self, name: str) -> str:
|
|
264
|
-
return qualified_name(self.module_name, self.class_stack, name)
|
|
265
|
-
|
|
266
263
|
def _extract_condition(self, node: ast.AST) -> str:
|
|
267
264
|
"""Extract condition as string."""
|
|
268
265
|
try:
|
|
@@ -206,6 +206,7 @@ class DataAnalyzer:
|
|
|
206
206
|
return sorted(data_types.values(), key=lambda x: x['usage_count'], reverse=True)
|
|
207
207
|
|
|
208
208
|
def _infer_parameter_types(self, func) -> list:
|
|
209
|
+
"""Infer parameter types from function name patterns."""
|
|
209
210
|
params = []
|
|
210
211
|
name = func.name.lower()
|
|
211
212
|
if 'list' in name or 'items' in name: params.append('list')
|
|
@@ -215,6 +216,7 @@ class DataAnalyzer:
|
|
|
215
216
|
return params
|
|
216
217
|
|
|
217
218
|
def _infer_return_types(self, func) -> list:
|
|
219
|
+
"""Infer return types from function name patterns."""
|
|
218
220
|
returns = []
|
|
219
221
|
name = func.name.lower()
|
|
220
222
|
if name.startswith(('get_', 'find_')): returns.append('dict')
|
|
@@ -224,6 +226,7 @@ class DataAnalyzer:
|
|
|
224
226
|
return returns
|
|
225
227
|
|
|
226
228
|
def _build_data_flow_graph(self, result: AnalysisResult) -> dict:
|
|
229
|
+
"""Build data flow graph from function relationships."""
|
|
227
230
|
nodes = {}
|
|
228
231
|
edges = []
|
|
229
232
|
for func_name, func in result.functions.items():
|
|
@@ -255,6 +258,7 @@ class DataAnalyzer:
|
|
|
255
258
|
return list(set(types))
|
|
256
259
|
|
|
257
260
|
def _identify_process_patterns(self, result: AnalysisResult) -> list:
|
|
261
|
+
"""Identify common data processing patterns (filter, map, reduce, etc.)."""
|
|
258
262
|
patterns = {'filter': [], 'map': [], 'reduce': [], 'aggregate': [], 'transform': [], 'validate': []}
|
|
259
263
|
indicators = {
|
|
260
264
|
'filter': ['filter', 'select', 'where', 'find'], 'map': ['map', 'transform', 'process'],
|
|
@@ -274,6 +278,7 @@ class DataAnalyzer:
|
|
|
274
278
|
return sorted(res, key=lambda x: x['count'], reverse=True)
|
|
275
279
|
|
|
276
280
|
def _analyze_optimization_opportunities(self, result: AnalysisResult, data_types: list, dfg: dict) -> dict:
|
|
281
|
+
"""Analyze optimization opportunities in data handling."""
|
|
277
282
|
opt = {'potential_score': 0.0, 'type_consolidation': [], 'process_consolidation': [], 'hub_optimization': [], 'recommendations': []}
|
|
278
283
|
similar = {}
|
|
279
284
|
for dt in data_types:
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Pipeline Classifier — domain classification and naming for pipelines.
|
|
2
|
+
|
|
3
|
+
Groups pipelines by module domain (NLP, Analysis, Export, Refactor, etc.)
|
|
4
|
+
and derives human-readable pipeline names.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from code2llm.core.models import FunctionInfo
|
|
11
|
+
from code2llm.analysis.type_inference import TypeInferenceEngine
|
|
12
|
+
|
|
13
|
+
# Module-to-domain mapping heuristics
|
|
14
|
+
DOMAIN_KEYWORDS: Dict[str, List[str]] = {
|
|
15
|
+
"NLP": ["nlp", "natural", "language", "intent", "entity",
|
|
16
|
+
"query", "normalize", "tokenize", "match"],
|
|
17
|
+
"Analysis": ["analysis", "analyzer", "analyse", "analyze",
|
|
18
|
+
"metric", "complexity", "cfg", "dfg", "call_graph"],
|
|
19
|
+
"Export": ["export", "exporter", "render", "format", "output",
|
|
20
|
+
"toon", "mermaid", "json_export", "yaml_export"],
|
|
21
|
+
"Refactor": ["refactor", "smell", "suggest", "fix", "patch",
|
|
22
|
+
"template", "prompt", "engine"],
|
|
23
|
+
"Core": ["core", "config", "model", "base", "util", "helper"],
|
|
24
|
+
"IO": ["io", "file", "path", "read", "write", "load", "save",
|
|
25
|
+
"cache", "storage"],
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PipelineClassifier:
|
|
30
|
+
"""Classify pipelines by domain and derive human-readable names."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, type_engine: Optional[TypeInferenceEngine] = None):
|
|
33
|
+
self._type_engine = type_engine or TypeInferenceEngine()
|
|
34
|
+
|
|
35
|
+
def classify_domain(
|
|
36
|
+
self, path: List[str], funcs: Dict[str, FunctionInfo]
|
|
37
|
+
) -> str:
|
|
38
|
+
"""Classify pipeline domain by analyzing module names and function names."""
|
|
39
|
+
scores: Dict[str, int] = defaultdict(int)
|
|
40
|
+
|
|
41
|
+
for qname in path:
|
|
42
|
+
fi = funcs.get(qname)
|
|
43
|
+
if not fi:
|
|
44
|
+
continue
|
|
45
|
+
text = f"{fi.module} {fi.name}".lower()
|
|
46
|
+
for domain, keywords in DOMAIN_KEYWORDS.items():
|
|
47
|
+
for kw in keywords:
|
|
48
|
+
if kw in text:
|
|
49
|
+
scores[domain] += 1
|
|
50
|
+
|
|
51
|
+
if scores:
|
|
52
|
+
return max(scores, key=scores.get)
|
|
53
|
+
return "Unknown"
|
|
54
|
+
|
|
55
|
+
def derive_pipeline_name(
|
|
56
|
+
self,
|
|
57
|
+
path: List[str],
|
|
58
|
+
funcs: Dict[str, FunctionInfo],
|
|
59
|
+
domain: str,
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Derive a human-readable pipeline name."""
|
|
62
|
+
# Use the dominant sub-module name
|
|
63
|
+
module_counts: Dict[str, int] = defaultdict(int)
|
|
64
|
+
for qname in path:
|
|
65
|
+
fi = funcs.get(qname)
|
|
66
|
+
if fi:
|
|
67
|
+
parts = fi.module.split(".")
|
|
68
|
+
# Use most specific module component
|
|
69
|
+
for part in parts:
|
|
70
|
+
if part and part not in ("code2llm", "__init__"):
|
|
71
|
+
module_counts[part] += 1
|
|
72
|
+
|
|
73
|
+
if module_counts:
|
|
74
|
+
dominant = max(module_counts, key=module_counts.get)
|
|
75
|
+
# Capitalize and use domain if module name is generic
|
|
76
|
+
if dominant in ("core", "base", "utils", "helpers"):
|
|
77
|
+
return domain
|
|
78
|
+
return dominant.capitalize()
|
|
79
|
+
|
|
80
|
+
return domain
|
|
81
|
+
|
|
82
|
+
def get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
|
|
83
|
+
"""Get the input type of a pipeline's entry point."""
|
|
84
|
+
if not fi:
|
|
85
|
+
return "?"
|
|
86
|
+
args = self._type_engine.get_arg_types(fi)
|
|
87
|
+
for arg in args:
|
|
88
|
+
if arg["name"] == "self":
|
|
89
|
+
continue
|
|
90
|
+
if arg.get("type"):
|
|
91
|
+
return arg["type"]
|
|
92
|
+
return arg["name"]
|
|
93
|
+
return "?"
|
|
94
|
+
|
|
95
|
+
def get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
|
|
96
|
+
"""Get the output type of a pipeline's exit point."""
|
|
97
|
+
if not fi:
|
|
98
|
+
return "?"
|
|
99
|
+
ret = self._type_engine.get_return_type(fi)
|
|
100
|
+
return ret if ret else "?"
|
|
@@ -8,18 +8,20 @@ Uses call graph analysis with networkx to:
|
|
|
8
8
|
- Aggregate purity per pipeline using SideEffectDetector
|
|
9
9
|
|
|
10
10
|
Sprint 3 (v0.3.2): Replaces the custom DFS chain-tracing in FlowExporter.
|
|
11
|
+
Refactored v0.5.x: Extracted resolver and classifier into separate modules.
|
|
11
12
|
"""
|
|
12
13
|
|
|
13
14
|
import logging
|
|
14
|
-
from collections import defaultdict
|
|
15
15
|
from dataclasses import dataclass, field
|
|
16
|
-
from typing import Any, Dict, List, Optional, Set
|
|
16
|
+
from typing import Any, Dict, List, Optional, Set
|
|
17
17
|
|
|
18
18
|
import networkx as nx
|
|
19
19
|
|
|
20
|
-
from code2llm.core.models import
|
|
20
|
+
from code2llm.core.models import FunctionInfo
|
|
21
21
|
from .side_effects import SideEffectDetector, SideEffectInfo
|
|
22
22
|
from .type_inference import TypeInferenceEngine
|
|
23
|
+
from .pipeline_resolver import PipelineResolver
|
|
24
|
+
from .pipeline_classifier import PipelineClassifier, DOMAIN_KEYWORDS
|
|
23
25
|
|
|
24
26
|
logger = logging.getLogger(__name__)
|
|
25
27
|
|
|
@@ -28,28 +30,6 @@ MIN_PIPELINE_LENGTH = 3
|
|
|
28
30
|
MAX_PIPELINES = 12
|
|
29
31
|
CC_HIGH = 15
|
|
30
32
|
|
|
31
|
-
# Patterns to exclude from analysis
|
|
32
|
-
EXCLUDE_PATTERNS = frozenset({
|
|
33
|
-
'venv', '.venv', 'env', '.env', 'publish-env', 'test-env',
|
|
34
|
-
'site-packages', 'node_modules', '__pycache__', '.git',
|
|
35
|
-
'dist', 'build', 'egg-info', '.tox', '.mypy_cache',
|
|
36
|
-
})
|
|
37
|
-
|
|
38
|
-
# Module-to-domain mapping heuristics
|
|
39
|
-
DOMAIN_KEYWORDS: Dict[str, List[str]] = {
|
|
40
|
-
"NLP": ["nlp", "natural", "language", "intent", "entity",
|
|
41
|
-
"query", "normalize", "tokenize", "match"],
|
|
42
|
-
"Analysis": ["analysis", "analyzer", "analyse", "analyze",
|
|
43
|
-
"metric", "complexity", "cfg", "dfg", "call_graph"],
|
|
44
|
-
"Export": ["export", "exporter", "render", "format", "output",
|
|
45
|
-
"toon", "mermaid", "json_export", "yaml_export"],
|
|
46
|
-
"Refactor": ["refactor", "smell", "suggest", "fix", "patch",
|
|
47
|
-
"template", "prompt", "engine"],
|
|
48
|
-
"Core": ["core", "config", "model", "base", "util", "helper"],
|
|
49
|
-
"IO": ["io", "file", "path", "read", "write", "load", "save",
|
|
50
|
-
"cache", "storage"],
|
|
51
|
-
}
|
|
52
|
-
|
|
53
33
|
|
|
54
34
|
@dataclass
|
|
55
35
|
class PipelineStage:
|
|
@@ -112,11 +92,28 @@ class Pipeline:
|
|
|
112
92
|
}
|
|
113
93
|
|
|
114
94
|
|
|
95
|
+
# Re-export for backward compatibility
|
|
96
|
+
__all__ = [
|
|
97
|
+
'PipelineDetector',
|
|
98
|
+
'Pipeline',
|
|
99
|
+
'PipelineStage',
|
|
100
|
+
'PipelineResolver',
|
|
101
|
+
'PipelineClassifier',
|
|
102
|
+
'DOMAIN_KEYWORDS',
|
|
103
|
+
'MIN_PIPELINE_LENGTH',
|
|
104
|
+
'MAX_PIPELINES',
|
|
105
|
+
'CC_HIGH',
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
|
|
115
109
|
class PipelineDetector:
|
|
116
110
|
"""Detect pipelines in a codebase using networkx graph analysis.
|
|
117
111
|
|
|
118
112
|
Builds a call graph as a DiGraph, finds longest paths as pipeline
|
|
119
113
|
candidates, groups by module domain, and labels entry/exit points.
|
|
114
|
+
|
|
115
|
+
Refactored to delegate resolution and classification to specialized
|
|
116
|
+
helper classes: PipelineResolver and PipelineClassifier.
|
|
120
117
|
"""
|
|
121
118
|
|
|
122
119
|
def __init__(
|
|
@@ -126,6 +123,8 @@ class PipelineDetector:
|
|
|
126
123
|
):
|
|
127
124
|
self._type_engine = type_engine or TypeInferenceEngine()
|
|
128
125
|
self._se_detector = side_effect_detector or SideEffectDetector()
|
|
126
|
+
self._resolver = PipelineResolver()
|
|
127
|
+
self._classifier = PipelineClassifier(self._type_engine)
|
|
129
128
|
|
|
130
129
|
def detect(
|
|
131
130
|
self,
|
|
@@ -177,7 +176,7 @@ class PipelineDetector:
|
|
|
177
176
|
|
|
178
177
|
for qname, fi in funcs.items():
|
|
179
178
|
for callee in fi.calls:
|
|
180
|
-
resolved = self.
|
|
179
|
+
resolved = self._resolver.resolve(callee, funcs, caller=fi)
|
|
181
180
|
if resolved and resolved != qname: # no self-loops
|
|
182
181
|
G.add_edge(qname, resolved)
|
|
183
182
|
|
|
@@ -303,8 +302,8 @@ class PipelineDetector:
|
|
|
303
302
|
if not stages:
|
|
304
303
|
continue
|
|
305
304
|
|
|
306
|
-
domain = self.
|
|
307
|
-
name = self.
|
|
305
|
+
domain = self._classifier.classify_domain(path, funcs)
|
|
306
|
+
name = self._classifier.derive_pipeline_name(path, funcs, domain)
|
|
308
307
|
|
|
309
308
|
# Entry/exit labeling
|
|
310
309
|
stages[0].is_entry = True
|
|
@@ -315,8 +314,8 @@ class PipelineDetector:
|
|
|
315
314
|
bottleneck = max(stages, key=lambda s: s.cc) if stages else None
|
|
316
315
|
|
|
317
316
|
# Entry/exit types
|
|
318
|
-
entry_type = self.
|
|
319
|
-
exit_type = self.
|
|
317
|
+
entry_type = self._classifier.get_entry_type(funcs.get(path[0]))
|
|
318
|
+
exit_type = self._classifier.get_exit_type(funcs.get(path[-1]))
|
|
320
319
|
|
|
321
320
|
pipeline = Pipeline(
|
|
322
321
|
name=name,
|
|
@@ -361,146 +360,3 @@ class PipelineDetector:
|
|
|
361
360
|
side_effect_summary=se_summary,
|
|
362
361
|
))
|
|
363
362
|
return stages
|
|
364
|
-
|
|
365
|
-
# ------------------------------------------------------------------
|
|
366
|
-
# domain classification
|
|
367
|
-
# ------------------------------------------------------------------
|
|
368
|
-
def _classify_domain(
|
|
369
|
-
self, path: List[str], funcs: Dict[str, FunctionInfo]
|
|
370
|
-
) -> str:
|
|
371
|
-
"""Classify pipeline domain by analyzing module names and function names."""
|
|
372
|
-
scores: Dict[str, int] = defaultdict(int)
|
|
373
|
-
|
|
374
|
-
for qname in path:
|
|
375
|
-
fi = funcs.get(qname)
|
|
376
|
-
if not fi:
|
|
377
|
-
continue
|
|
378
|
-
text = f"{fi.module} {fi.name}".lower()
|
|
379
|
-
for domain, keywords in DOMAIN_KEYWORDS.items():
|
|
380
|
-
for kw in keywords:
|
|
381
|
-
if kw in text:
|
|
382
|
-
scores[domain] += 1
|
|
383
|
-
|
|
384
|
-
if scores:
|
|
385
|
-
return max(scores, key=scores.get)
|
|
386
|
-
return "Unknown"
|
|
387
|
-
|
|
388
|
-
def _derive_pipeline_name(
|
|
389
|
-
self, path: List[str],
|
|
390
|
-
funcs: Dict[str, FunctionInfo],
|
|
391
|
-
domain: str,
|
|
392
|
-
) -> str:
|
|
393
|
-
"""Derive a human-readable pipeline name."""
|
|
394
|
-
# Use the dominant sub-module name
|
|
395
|
-
module_counts: Dict[str, int] = defaultdict(int)
|
|
396
|
-
for qname in path:
|
|
397
|
-
fi = funcs.get(qname)
|
|
398
|
-
if fi:
|
|
399
|
-
parts = fi.module.split(".")
|
|
400
|
-
# Use most specific module component
|
|
401
|
-
for part in parts:
|
|
402
|
-
if part and part not in ("code2llm", "__init__"):
|
|
403
|
-
module_counts[part] += 1
|
|
404
|
-
|
|
405
|
-
if module_counts:
|
|
406
|
-
dominant = max(module_counts, key=module_counts.get)
|
|
407
|
-
# Capitalize and use domain if module name is generic
|
|
408
|
-
if dominant in ("core", "base", "utils", "helpers"):
|
|
409
|
-
return domain
|
|
410
|
-
return dominant.capitalize()
|
|
411
|
-
|
|
412
|
-
return domain
|
|
413
|
-
|
|
414
|
-
# ------------------------------------------------------------------
|
|
415
|
-
# type helpers
|
|
416
|
-
# ------------------------------------------------------------------
|
|
417
|
-
def _get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
|
|
418
|
-
"""Get the input type of a pipeline's entry point."""
|
|
419
|
-
if not fi:
|
|
420
|
-
return "?"
|
|
421
|
-
args = self._type_engine.get_arg_types(fi)
|
|
422
|
-
for arg in args:
|
|
423
|
-
if arg["name"] == "self":
|
|
424
|
-
continue
|
|
425
|
-
if arg.get("type"):
|
|
426
|
-
return arg["type"]
|
|
427
|
-
return arg["name"]
|
|
428
|
-
return "?"
|
|
429
|
-
|
|
430
|
-
def _get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
|
|
431
|
-
"""Get the output type of a pipeline's exit point."""
|
|
432
|
-
if not fi:
|
|
433
|
-
return "?"
|
|
434
|
-
ret = self._type_engine.get_return_type(fi)
|
|
435
|
-
return ret if ret else "?"
|
|
436
|
-
|
|
437
|
-
# ------------------------------------------------------------------
|
|
438
|
-
# callee resolution
|
|
439
|
-
# ------------------------------------------------------------------
|
|
440
|
-
def _resolve_callee(
|
|
441
|
-
self, callee: str, funcs: Dict[str, FunctionInfo],
|
|
442
|
-
caller: Optional[FunctionInfo] = None,
|
|
443
|
-
) -> Optional[str]:
|
|
444
|
-
"""Resolve callee name to qualified name.
|
|
445
|
-
|
|
446
|
-
Handles:
|
|
447
|
-
- Direct qualified matches
|
|
448
|
-
- self.method → same-class method resolution
|
|
449
|
-
- Unqualified names with same-class preference
|
|
450
|
-
|
|
451
|
-
Returns None for ambiguous matches (multiple candidates)
|
|
452
|
-
to avoid creating phantom pipeline edges.
|
|
453
|
-
"""
|
|
454
|
-
# Direct match
|
|
455
|
-
if callee in funcs:
|
|
456
|
-
return callee
|
|
457
|
-
|
|
458
|
-
bare, is_self_call = self._strip_self_prefix(callee)
|
|
459
|
-
|
|
460
|
-
# Try same-class resolution first
|
|
461
|
-
if result := self._try_same_class_resolution(bare, caller, funcs):
|
|
462
|
-
return result
|
|
463
|
-
|
|
464
|
-
# Suffix match
|
|
465
|
-
candidates = self._get_suffix_candidates(bare, funcs)
|
|
466
|
-
if len(candidates) == 1:
|
|
467
|
-
return candidates[0]
|
|
468
|
-
|
|
469
|
-
# Prefer same-class candidates for method calls
|
|
470
|
-
return self._select_same_class_candidate(candidates, caller, is_self_call)
|
|
471
|
-
|
|
472
|
-
def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
|
|
473
|
-
"""Strip self. prefix and return bare name + flag."""
|
|
474
|
-
if callee.startswith("self."):
|
|
475
|
-
return callee[5:], True
|
|
476
|
-
return callee, False
|
|
477
|
-
|
|
478
|
-
def _try_same_class_resolution(
|
|
479
|
-
self, bare: str, caller: Optional[FunctionInfo], funcs: Dict[str, FunctionInfo]
|
|
480
|
-
) -> Optional[str]:
|
|
481
|
-
"""Try to resolve method in the same class as caller."""
|
|
482
|
-
if caller and caller.class_name:
|
|
483
|
-
class_prefix = f"{caller.module}.{caller.class_name}."
|
|
484
|
-
class_candidate = class_prefix + bare
|
|
485
|
-
if class_candidate in funcs:
|
|
486
|
-
return class_candidate
|
|
487
|
-
return None
|
|
488
|
-
|
|
489
|
-
def _get_suffix_candidates(self, bare: str, funcs: Dict[str, FunctionInfo]) -> List[str]:
|
|
490
|
-
"""Find candidates matching by suffix."""
|
|
491
|
-
return [qn for qn in funcs if qn.endswith(f".{bare}")]
|
|
492
|
-
|
|
493
|
-
def _select_same_class_candidate(
|
|
494
|
-
self, candidates: List[str], caller: Optional[FunctionInfo], is_self_call: bool
|
|
495
|
-
) -> Optional[str]:
|
|
496
|
-
"""Select candidate from same class if applicable."""
|
|
497
|
-
if not candidates or not (is_self_call or (caller and caller.class_name)):
|
|
498
|
-
return None
|
|
499
|
-
|
|
500
|
-
same_class = [
|
|
501
|
-
qn for qn in candidates
|
|
502
|
-
if caller and caller.class_name and f".{caller.class_name}." in qn
|
|
503
|
-
]
|
|
504
|
-
if len(same_class) == 1:
|
|
505
|
-
return same_class[0]
|
|
506
|
-
return None
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Pipeline Resolver — callee resolution for pipeline detection.
|
|
2
|
+
|
|
3
|
+
Handles resolution of function calls to qualified names,
|
|
4
|
+
including self.method resolution within the same class.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from code2llm.core.models import FunctionInfo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PipelineResolver:
|
|
13
|
+
"""Resolves callee names to qualified function names."""
|
|
14
|
+
|
|
15
|
+
def resolve(
|
|
16
|
+
self,
|
|
17
|
+
callee: str,
|
|
18
|
+
funcs: Dict[str, FunctionInfo],
|
|
19
|
+
caller: Optional[FunctionInfo] = None,
|
|
20
|
+
) -> Optional[str]:
|
|
21
|
+
"""Resolve callee name to qualified name.
|
|
22
|
+
|
|
23
|
+
Handles:
|
|
24
|
+
- Direct qualified matches
|
|
25
|
+
- self.method → same-class method resolution
|
|
26
|
+
- Unqualified names with same-class preference
|
|
27
|
+
|
|
28
|
+
Returns None for ambiguous matches (multiple candidates)
|
|
29
|
+
to avoid creating phantom pipeline edges.
|
|
30
|
+
"""
|
|
31
|
+
# Direct match
|
|
32
|
+
if callee in funcs:
|
|
33
|
+
return callee
|
|
34
|
+
|
|
35
|
+
bare, is_self_call = self._strip_self_prefix(callee)
|
|
36
|
+
|
|
37
|
+
# Try same-class resolution first
|
|
38
|
+
if result := self._try_same_class_resolution(bare, caller, funcs):
|
|
39
|
+
return result
|
|
40
|
+
|
|
41
|
+
# Suffix match
|
|
42
|
+
candidates = self._get_suffix_candidates(bare, funcs)
|
|
43
|
+
if len(candidates) == 1:
|
|
44
|
+
return candidates[0]
|
|
45
|
+
|
|
46
|
+
# Prefer same-class candidates for method calls
|
|
47
|
+
return self._select_same_class_candidate(candidates, caller, is_self_call)
|
|
48
|
+
|
|
49
|
+
def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
|
|
50
|
+
"""Strip self. prefix and return bare name + flag."""
|
|
51
|
+
if callee.startswith("self."):
|
|
52
|
+
return callee[5:], True
|
|
53
|
+
return callee, False
|
|
54
|
+
|
|
55
|
+
def _try_same_class_resolution(
|
|
56
|
+
self,
|
|
57
|
+
bare: str,
|
|
58
|
+
caller: Optional[FunctionInfo],
|
|
59
|
+
funcs: Dict[str, FunctionInfo],
|
|
60
|
+
) -> Optional[str]:
|
|
61
|
+
"""Try to resolve method in the same class as caller."""
|
|
62
|
+
if caller and caller.class_name:
|
|
63
|
+
class_prefix = f"{caller.module}.{caller.class_name}."
|
|
64
|
+
class_candidate = class_prefix + bare
|
|
65
|
+
if class_candidate in funcs:
|
|
66
|
+
return class_candidate
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def _get_suffix_candidates(
|
|
70
|
+
self, bare: str, funcs: Dict[str, FunctionInfo]
|
|
71
|
+
) -> List[str]:
|
|
72
|
+
"""Find candidates matching by suffix."""
|
|
73
|
+
return [qn for qn in funcs if qn.endswith(f".{bare}")]
|
|
74
|
+
|
|
75
|
+
def _select_same_class_candidate(
|
|
76
|
+
self,
|
|
77
|
+
candidates: List[str],
|
|
78
|
+
caller: Optional[FunctionInfo],
|
|
79
|
+
is_self_call: bool,
|
|
80
|
+
) -> Optional[str]:
|
|
81
|
+
"""Select candidate from same class if applicable."""
|
|
82
|
+
if not candidates or not (is_self_call or (caller and caller.class_name)):
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
same_class = [
|
|
86
|
+
qn for qn in candidates
|
|
87
|
+
if caller and caller.class_name and f".{caller.class_name}." in qn
|
|
88
|
+
]
|
|
89
|
+
if len(same_class) == 1:
|
|
90
|
+
return same_class[0]
|
|
91
|
+
return None
|