PyPI - code2llm - Versions diffs - 0.5.119__tar.gz → 0.5.121__tar.gz - Mend

code2llm 0.5.119tar.gz → 0.5.121tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

{code2llm-0.5.119 → code2llm-0.5.121}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code2llm
-Version: 0.5.119
+Version: 0.5.121
 Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
 Home-page: https://github.com/wronai/stts
 Author: STTS Project
@@ -67,7 +67,7 @@ Dynamic: requires-python
 ## AI Cost Tracking
-![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.119-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
+![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
 ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
 - 🤖 **LLM usage:** $7.5000 (166 commits)

{code2llm-0.5.119 → code2llm-0.5.121}/README.md RENAMED Viewed

@@ -3,7 +3,7 @@
 ## AI Cost Tracking
-![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.119-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
+![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.121-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
 ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-57.3h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
 - 🤖 **LLM usage:** $7.5000 (166 commits)

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
 and entity resolution with multilingual support.
 """
-__version__ = "0.5.119"
+__version__ = "0.5.121"
 __author__ = "STTS Project"
 # Core analysis components (lightweight, always needed)

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/__init__.py RENAMED Viewed

@@ -10,6 +10,8 @@ __all__ = [
     'TypeInferenceEngine',
     'SideEffectDetector',
     'PipelineDetector',
+    'PipelineResolver',
+    'PipelineClassifier',
 ]
@@ -25,6 +27,8 @@ def __getattr__(name):
         'TypeInferenceEngine': '.type_inference',
         'SideEffectDetector': '.side_effects',
         'PipelineDetector': '.pipeline_detector',
+        'PipelineResolver': '.pipeline_resolver',
+        'PipelineClassifier': '.pipeline_classifier',
     }
     if name in _imports:
         import importlib

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/call_graph.py RENAMED Viewed

@@ -97,7 +97,7 @@ class CallGraphExtractor(ast.NodeVisitor):
     def visit_FunctionDef(self, node: ast.FunctionDef):
         """Visit function definition and track calls within it."""
-        func_name = self._qualified_name(node.name)
+        func_name = qualified_name(self.module_name, self.class_stack, node.name)
         self.function_stack.append(func_name)
         # Visit body to find calls
@@ -139,9 +139,6 @@ class CallGraphExtractor(ast.NodeVisitor):
         self.generic_visit(node)
-    def _qualified_name(self, name: str) -> str:
-        return qualified_name(self.module_name, self.class_stack, name)
     def _resolve_call(self, node: ast.AST) -> Optional[str]:
         """Resolve a call to its full name."""
         if isinstance(node, ast.Name):

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/cfg.py RENAMED Viewed

@@ -69,7 +69,7 @@ class CFGExtractor(ast.NodeVisitor):
     def visit_FunctionDef(self, node: ast.FunctionDef):
         """Visit function definition."""
-        func_name = self._qualified_name(node.name)
+        func_name = qualified_name(self.module_name, self.class_stack, node.name)
         self.function_stack.append(func_name)
         # Create entry node
@@ -260,9 +260,6 @@ class CFGExtractor(ast.NodeVisitor):
         else:
             self.generic_visit(node)
-    def _qualified_name(self, name: str) -> str:
-        return qualified_name(self.module_name, self.class_stack, name)
     def _extract_condition(self, node: ast.AST) -> str:
         """Extract condition as string."""
         try:

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/data_analysis.py RENAMED Viewed

@@ -206,6 +206,7 @@ class DataAnalyzer:
         return sorted(data_types.values(), key=lambda x: x['usage_count'], reverse=True)
     def _infer_parameter_types(self, func) -> list:
+        """Infer parameter types from function name patterns."""
         params = []
         name = func.name.lower()
         if 'list' in name or 'items' in name: params.append('list')
@@ -215,6 +216,7 @@ class DataAnalyzer:
         return params
     def _infer_return_types(self, func) -> list:
+        """Infer return types from function name patterns."""
         returns = []
         name = func.name.lower()
         if name.startswith(('get_', 'find_')): returns.append('dict')
@@ -224,6 +226,7 @@ class DataAnalyzer:
         return returns
     def _build_data_flow_graph(self, result: AnalysisResult) -> dict:
+        """Build data flow graph from function relationships."""
         nodes = {}
         edges = []
         for func_name, func in result.functions.items():
@@ -255,6 +258,7 @@ class DataAnalyzer:
         return list(set(types))
     def _identify_process_patterns(self, result: AnalysisResult) -> list:
+        """Identify common data processing patterns (filter, map, reduce, etc.)."""
         patterns = {'filter': [], 'map': [], 'reduce': [], 'aggregate': [], 'transform': [], 'validate': []}
         indicators = {
             'filter': ['filter', 'select', 'where', 'find'], 'map': ['map', 'transform', 'process'],
@@ -274,6 +278,7 @@ class DataAnalyzer:
         return sorted(res, key=lambda x: x['count'], reverse=True)
     def _analyze_optimization_opportunities(self, result: AnalysisResult, data_types: list, dfg: dict) -> dict:
+        """Analyze optimization opportunities in data handling."""
         opt = {'potential_score': 0.0, 'type_consolidation': [], 'process_consolidation': [], 'hub_optimization': [], 'recommendations': []}
         similar = {}
         for dt in data_types:

code2llm-0.5.121/code2llm/analysis/pipeline_classifier.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Pipeline Classifier — domain classification and naming for pipelines.
+Groups pipelines by module domain (NLP, Analysis, Export, Refactor, etc.)
+and derives human-readable pipeline names.
+"""
+from collections import defaultdict
+from typing import Dict, List, Optional
+from code2llm.core.models import FunctionInfo
+from code2llm.analysis.type_inference import TypeInferenceEngine
+# Module-to-domain mapping heuristics
+DOMAIN_KEYWORDS: Dict[str, List[str]] = {
+    "NLP": ["nlp", "natural", "language", "intent", "entity",
+            "query", "normalize", "tokenize", "match"],
+    "Analysis": ["analysis", "analyzer", "analyse", "analyze",
+                 "metric", "complexity", "cfg", "dfg", "call_graph"],
+    "Export": ["export", "exporter", "render", "format", "output",
+               "toon", "mermaid", "json_export", "yaml_export"],
+    "Refactor": ["refactor", "smell", "suggest", "fix", "patch",
+                 "template", "prompt", "engine"],
+    "Core": ["core", "config", "model", "base", "util", "helper"],
+    "IO": ["io", "file", "path", "read", "write", "load", "save",
+           "cache", "storage"],
+}
+class PipelineClassifier:
+    """Classify pipelines by domain and derive human-readable names."""
+    def __init__(self, type_engine: Optional[TypeInferenceEngine] = None):
+        self._type_engine = type_engine or TypeInferenceEngine()
+    def classify_domain(
+        self, path: List[str], funcs: Dict[str, FunctionInfo]
+    ) -> str:
+        """Classify pipeline domain by analyzing module names and function names."""
+        scores: Dict[str, int] = defaultdict(int)
+        for qname in path:
+            fi = funcs.get(qname)
+            if not fi:
+                continue
+            text = f"{fi.module} {fi.name}".lower()
+            for domain, keywords in DOMAIN_KEYWORDS.items():
+                for kw in keywords:
+                    if kw in text:
+                        scores[domain] += 1
+        if scores:
+            return max(scores, key=scores.get)
+        return "Unknown"
+    def derive_pipeline_name(
+        self,
+        path: List[str],
+        funcs: Dict[str, FunctionInfo],
+        domain: str,
+    ) -> str:
+        """Derive a human-readable pipeline name."""
+        # Use the dominant sub-module name
+        module_counts: Dict[str, int] = defaultdict(int)
+        for qname in path:
+            fi = funcs.get(qname)
+            if fi:
+                parts = fi.module.split(".")
+                # Use most specific module component
+                for part in parts:
+                    if part and part not in ("code2llm", "__init__"):
+                        module_counts[part] += 1
+        if module_counts:
+            dominant = max(module_counts, key=module_counts.get)
+            # Capitalize and use domain if module name is generic
+            if dominant in ("core", "base", "utils", "helpers"):
+                return domain
+            return dominant.capitalize()
+        return domain
+    def get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
+        """Get the input type of a pipeline's entry point."""
+        if not fi:
+            return "?"
+        args = self._type_engine.get_arg_types(fi)
+        for arg in args:
+            if arg["name"] == "self":
+                continue
+            if arg.get("type"):
+                return arg["type"]
+            return arg["name"]
+        return "?"
+    def get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
+        """Get the output type of a pipeline's exit point."""
+        if not fi:
+            return "?"
+        ret = self._type_engine.get_return_type(fi)
+        return ret if ret else "?"

{code2llm-0.5.119 → code2llm-0.5.121}/code2llm/analysis/pipeline_detector.py RENAMED Viewed

@@ -8,18 +8,20 @@ Uses call graph analysis with networkx to:
 - Aggregate purity per pipeline using SideEffectDetector
 Sprint 3 (v0.3.2): Replaces the custom DFS chain-tracing in FlowExporter.
+Refactored v0.5.x: Extracted resolver and classifier into separate modules.
 """
 import logging
-from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set
 import networkx as nx
-from code2llm.core.models import AnalysisResult, FunctionInfo
+from code2llm.core.models import FunctionInfo
 from .side_effects import SideEffectDetector, SideEffectInfo
 from .type_inference import TypeInferenceEngine
+from .pipeline_resolver import PipelineResolver
+from .pipeline_classifier import PipelineClassifier, DOMAIN_KEYWORDS
 logger = logging.getLogger(__name__)
@@ -28,28 +30,6 @@ MIN_PIPELINE_LENGTH = 3
 MAX_PIPELINES = 12
 CC_HIGH = 15
-# Patterns to exclude from analysis
-EXCLUDE_PATTERNS = frozenset({
-    'venv', '.venv', 'env', '.env', 'publish-env', 'test-env',
-    'site-packages', 'node_modules', '__pycache__', '.git',
-    'dist', 'build', 'egg-info', '.tox', '.mypy_cache',
-})
-# Module-to-domain mapping heuristics
-DOMAIN_KEYWORDS: Dict[str, List[str]] = {
-    "NLP": ["nlp", "natural", "language", "intent", "entity",
-            "query", "normalize", "tokenize", "match"],
-    "Analysis": ["analysis", "analyzer", "analyse", "analyze",
-                 "metric", "complexity", "cfg", "dfg", "call_graph"],
-    "Export": ["export", "exporter", "render", "format", "output",
-               "toon", "mermaid", "json_export", "yaml_export"],
-    "Refactor": ["refactor", "smell", "suggest", "fix", "patch",
-                 "template", "prompt", "engine"],
-    "Core": ["core", "config", "model", "base", "util", "helper"],
-    "IO": ["io", "file", "path", "read", "write", "load", "save",
-           "cache", "storage"],
-}
 @dataclass
 class PipelineStage:
@@ -112,11 +92,28 @@ class Pipeline:
         }
+# Re-export for backward compatibility
+__all__ = [
+    'PipelineDetector',
+    'Pipeline',
+    'PipelineStage',
+    'PipelineResolver',
+    'PipelineClassifier',
+    'DOMAIN_KEYWORDS',
+    'MIN_PIPELINE_LENGTH',
+    'MAX_PIPELINES',
+    'CC_HIGH',
+]
 class PipelineDetector:
     """Detect pipelines in a codebase using networkx graph analysis.
     Builds a call graph as a DiGraph, finds longest paths as pipeline
     candidates, groups by module domain, and labels entry/exit points.
+    Refactored to delegate resolution and classification to specialized
+    helper classes: PipelineResolver and PipelineClassifier.
     """
     def __init__(
@@ -126,6 +123,8 @@ class PipelineDetector:
     ):
         self._type_engine = type_engine or TypeInferenceEngine()
         self._se_detector = side_effect_detector or SideEffectDetector()
+        self._resolver = PipelineResolver()
+        self._classifier = PipelineClassifier(self._type_engine)
     def detect(
         self,
@@ -177,7 +176,7 @@ class PipelineDetector:
         for qname, fi in funcs.items():
             for callee in fi.calls:
-                resolved = self._resolve_callee(callee, funcs, caller=fi)
+                resolved = self._resolver.resolve(callee, funcs, caller=fi)
                 if resolved and resolved != qname:  # no self-loops
                     G.add_edge(qname, resolved)
@@ -303,8 +302,8 @@ class PipelineDetector:
             if not stages:
                 continue
-            domain = self._classify_domain(path, funcs)
-            name = self._derive_pipeline_name(path, funcs, domain)
+            domain = self._classifier.classify_domain(path, funcs)
+            name = self._classifier.derive_pipeline_name(path, funcs, domain)
             # Entry/exit labeling
             stages[0].is_entry = True
@@ -315,8 +314,8 @@ class PipelineDetector:
             bottleneck = max(stages, key=lambda s: s.cc) if stages else None
             # Entry/exit types
-            entry_type = self._get_entry_type(funcs.get(path[0]))
-            exit_type = self._get_exit_type(funcs.get(path[-1]))
+            entry_type = self._classifier.get_entry_type(funcs.get(path[0]))
+            exit_type = self._classifier.get_exit_type(funcs.get(path[-1]))
             pipeline = Pipeline(
                 name=name,
@@ -361,146 +360,3 @@ class PipelineDetector:
                 side_effect_summary=se_summary,
             ))
         return stages
-    # ------------------------------------------------------------------
-    # domain classification
-    # ------------------------------------------------------------------
-    def _classify_domain(
-        self, path: List[str], funcs: Dict[str, FunctionInfo]
-    ) -> str:
-        """Classify pipeline domain by analyzing module names and function names."""
-        scores: Dict[str, int] = defaultdict(int)
-        for qname in path:
-            fi = funcs.get(qname)
-            if not fi:
-                continue
-            text = f"{fi.module} {fi.name}".lower()
-            for domain, keywords in DOMAIN_KEYWORDS.items():
-                for kw in keywords:
-                    if kw in text:
-                        scores[domain] += 1
-        if scores:
-            return max(scores, key=scores.get)
-        return "Unknown"
-    def _derive_pipeline_name(
-        self, path: List[str],
-        funcs: Dict[str, FunctionInfo],
-        domain: str,
-    ) -> str:
-        """Derive a human-readable pipeline name."""
-        # Use the dominant sub-module name
-        module_counts: Dict[str, int] = defaultdict(int)
-        for qname in path:
-            fi = funcs.get(qname)
-            if fi:
-                parts = fi.module.split(".")
-                # Use most specific module component
-                for part in parts:
-                    if part and part not in ("code2llm", "__init__"):
-                        module_counts[part] += 1
-        if module_counts:
-            dominant = max(module_counts, key=module_counts.get)
-            # Capitalize and use domain if module name is generic
-            if dominant in ("core", "base", "utils", "helpers"):
-                return domain
-            return dominant.capitalize()
-        return domain
-    # ------------------------------------------------------------------
-    # type helpers
-    # ------------------------------------------------------------------
-    def _get_entry_type(self, fi: Optional[FunctionInfo]) -> str:
-        """Get the input type of a pipeline's entry point."""
-        if not fi:
-            return "?"
-        args = self._type_engine.get_arg_types(fi)
-        for arg in args:
-            if arg["name"] == "self":
-                continue
-            if arg.get("type"):
-                return arg["type"]
-            return arg["name"]
-        return "?"
-    def _get_exit_type(self, fi: Optional[FunctionInfo]) -> str:
-        """Get the output type of a pipeline's exit point."""
-        if not fi:
-            return "?"
-        ret = self._type_engine.get_return_type(fi)
-        return ret if ret else "?"
-    # ------------------------------------------------------------------
-    # callee resolution
-    # ------------------------------------------------------------------
-    def _resolve_callee(
-        self, callee: str, funcs: Dict[str, FunctionInfo],
-        caller: Optional[FunctionInfo] = None,
-    ) -> Optional[str]:
-        """Resolve callee name to qualified name.
-        Handles:
-        - Direct qualified matches
-        - self.method → same-class method resolution
-        - Unqualified names with same-class preference
-        Returns None for ambiguous matches (multiple candidates)
-        to avoid creating phantom pipeline edges.
-        """
-        # Direct match
-        if callee in funcs:
-            return callee
-        bare, is_self_call = self._strip_self_prefix(callee)
-        # Try same-class resolution first
-        if result := self._try_same_class_resolution(bare, caller, funcs):
-            return result
-        # Suffix match
-        candidates = self._get_suffix_candidates(bare, funcs)
-        if len(candidates) == 1:
-            return candidates[0]
-        # Prefer same-class candidates for method calls
-        return self._select_same_class_candidate(candidates, caller, is_self_call)
-    def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
-        """Strip self. prefix and return bare name + flag."""
-        if callee.startswith("self."):
-            return callee[5:], True
-        return callee, False
-    def _try_same_class_resolution(
-        self, bare: str, caller: Optional[FunctionInfo], funcs: Dict[str, FunctionInfo]
-    ) -> Optional[str]:
-        """Try to resolve method in the same class as caller."""
-        if caller and caller.class_name:
-            class_prefix = f"{caller.module}.{caller.class_name}."
-            class_candidate = class_prefix + bare
-            if class_candidate in funcs:
-                return class_candidate
-        return None
-    def _get_suffix_candidates(self, bare: str, funcs: Dict[str, FunctionInfo]) -> List[str]:
-        """Find candidates matching by suffix."""
-        return [qn for qn in funcs if qn.endswith(f".{bare}")]
-    def _select_same_class_candidate(
-        self, candidates: List[str], caller: Optional[FunctionInfo], is_self_call: bool
-    ) -> Optional[str]:
-        """Select candidate from same class if applicable."""
-        if not candidates or not (is_self_call or (caller and caller.class_name)):
-            return None
-        same_class = [
-            qn for qn in candidates
-            if caller and caller.class_name and f".{caller.class_name}." in qn
-        ]
-        if len(same_class) == 1:
-            return same_class[0]
-        return None

code2llm-0.5.121/code2llm/analysis/pipeline_resolver.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Pipeline Resolver — callee resolution for pipeline detection.
+Handles resolution of function calls to qualified names,
+including self.method resolution within the same class.
+"""
+from typing import Dict, List, Optional, Tuple
+from code2llm.core.models import FunctionInfo
+class PipelineResolver:
+    """Resolves callee names to qualified function names."""
+    def resolve(
+        self,
+        callee: str,
+        funcs: Dict[str, FunctionInfo],
+        caller: Optional[FunctionInfo] = None,
+    ) -> Optional[str]:
+        """Resolve callee name to qualified name.
+        Handles:
+        - Direct qualified matches
+        - self.method → same-class method resolution
+        - Unqualified names with same-class preference
+        Returns None for ambiguous matches (multiple candidates)
+        to avoid creating phantom pipeline edges.
+        """
+        # Direct match
+        if callee in funcs:
+            return callee
+        bare, is_self_call = self._strip_self_prefix(callee)
+        # Try same-class resolution first
+        if result := self._try_same_class_resolution(bare, caller, funcs):
+            return result
+        # Suffix match
+        candidates = self._get_suffix_candidates(bare, funcs)
+        if len(candidates) == 1:
+            return candidates[0]
+        # Prefer same-class candidates for method calls
+        return self._select_same_class_candidate(candidates, caller, is_self_call)
+    def _strip_self_prefix(self, callee: str) -> Tuple[str, bool]:
+        """Strip self. prefix and return bare name + flag."""
+        if callee.startswith("self."):
+            return callee[5:], True
+        return callee, False
+    def _try_same_class_resolution(
+        self,
+        bare: str,
+        caller: Optional[FunctionInfo],
+        funcs: Dict[str, FunctionInfo],
+    ) -> Optional[str]:
+        """Try to resolve method in the same class as caller."""
+        if caller and caller.class_name:
+            class_prefix = f"{caller.module}.{caller.class_name}."
+            class_candidate = class_prefix + bare
+            if class_candidate in funcs:
+                return class_candidate
+        return None
+    def _get_suffix_candidates(
+        self, bare: str, funcs: Dict[str, FunctionInfo]
+    ) -> List[str]:
+        """Find candidates matching by suffix."""
+        return [qn for qn in funcs if qn.endswith(f".{bare}")]
+    def _select_same_class_candidate(
+        self,
+        candidates: List[str],
+        caller: Optional[FunctionInfo],
+        is_self_call: bool,
+    ) -> Optional[str]:
+        """Select candidate from same class if applicable."""
+        if not candidates or not (is_self_call or (caller and caller.class_name)):
+            return None
+        same_class = [
+            qn for qn in candidates
+            if caller and caller.class_name and f".{caller.class_name}." in qn
+        ]
+        if len(same_class) == 1:
+            return same_class[0]
+        return None

code2llm 0.5.119__tar.gz → 0.5.121__tar.gz

code2llm 0.5.119tar.gz → 0.5.121tar.gz