PyPI - code2logic - Versions diffs - 1.0.0__py3-none-any.whl - Mend

code2logic 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

code2logic/__init__.py +88 -0
code2logic/analyzer.py +286 -0
code2logic/cli.py +222 -0
code2logic/dependency.py +246 -0
code2logic/generators.py +1017 -0
code2logic/gherkin.py +980 -0
code2logic/intent.py +246 -0
code2logic/llm.py +449 -0
code2logic/mcp_server.py +354 -0
code2logic/models.py +170 -0
code2logic/parsers.py +908 -0
code2logic/py.typed +2 -0
code2logic/similarity.py +165 -0
code2logic-1.0.0.dist-info/METADATA +322 -0
code2logic-1.0.0.dist-info/RECORD +18 -0
code2logic-1.0.0.dist-info/WHEEL +4 -0
code2logic-1.0.0.dist-info/entry_points.txt +3 -0
code2logic-1.0.0.dist-info/licenses/LICENSE +201 -0

code2logic/intent.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""
+Enhanced Intent Generator with NLP support.
+Uses lemmatization, pattern matching, and docstring extraction
+to generate human-readable intent descriptions for functions.
+"""
+import re
+from typing import Optional, List, Tuple
+# Optional NLP imports with graceful degradation
+try:
+    import nltk
+    from nltk.stem import WordNetLemmatizer
+    NLTK_AVAILABLE = True
+except ImportError:
+    NLTK_AVAILABLE = False
+try:
+    import spacy
+    SPACY_AVAILABLE = True
+except ImportError:
+    SPACY_AVAILABLE = False
+class EnhancedIntentGenerator:
+    """
+    Generator intencji z NLP - lemmatyzacja, ekstrakcja z docstringów.
+    Supports both English and Polish intent generation.
+    Falls back gracefully if NLP libraries are not available.
+    Example:
+        >>> gen = EnhancedIntentGenerator(lang='en')
+        >>> gen.generate("getUserById", "Fetches a user by their ID")
+        'retrieves user by id'
+        >>> gen.generate("validateEmail")
+        'validates email'
+    """
+    # Extended verb patterns (PL + EN)
+    VERB_PATTERNS: dict[tuple[str, ...], tuple[str, str]] = {
+        # CRUD operations
+        ('get', 'fetch', 'retrieve', 'load', 'find', 'query', 'read', 'select'):
+            ('pobiera', 'retrieves'),
+        ('set', 'update', 'modify', 'change', 'edit', 'put', 'patch'):
+            ('aktualizuje', 'updates'),
+        ('create', 'make', 'build', 'generate', 'new', 'add', 'insert', 'post', 'init'):
+            ('tworzy', 'creates'),
+        ('delete', 'remove', 'clear', 'destroy', 'drop', 'erase'):
+            ('usuwa', 'deletes'),
+        # Validation
+        ('is', 'has', 'can', 'should', 'check', 'test', 'assert'):
+            ('sprawdza', 'checks'),
+        ('validate', 'verify', 'confirm', 'authenticate'):
+            ('waliduje', 'validates'),
+        # Transformation
+        ('convert', 'transform', 'map', 'translate', 'cast', 'to'):
+            ('konwertuje', 'converts'),
+        ('parse', 'extract', 'decode', 'deserialize'):
+            ('parsuje', 'parses'),
+        ('format', 'render', 'serialize', 'encode', 'stringify'):
+            ('formatuje', 'formats'),
+        # Communication
+        ('send', 'emit', 'dispatch', 'publish', 'notify', 'push'):
+            ('wysyła', 'sends'),
+        ('receive', 'listen', 'subscribe', 'on', 'handle'):
+            ('obsługuje', 'handles'),
+        # Lifecycle
+        ('init', 'initialize', 'setup', 'configure', 'bootstrap'):
+            ('inicjalizuje', 'initializes'),
+        ('start', 'run', 'execute', 'launch', 'begin', 'open'):
+            ('uruchamia', 'starts'),
+        ('stop', 'end', 'finish', 'close', 'shutdown', 'terminate'):
+            ('kończy', 'stops'),
+        # Data operations
+        ('process', 'compute', 'calculate', 'evaluate', 'analyze'):
+            ('przetwarza', 'processes'),
+        ('filter', 'search', 'match', 'lookup'):
+            ('filtruje', 'filters'),
+        ('sort', 'order', 'arrange', 'rank'):
+            ('sortuje', 'sorts'),
+        ('merge', 'combine', 'join', 'concat'):
+            ('łączy', 'merges'),
+        ('split', 'divide', 'separate', 'partition'):
+            ('dzieli', 'splits'),
+        # Logging
+        ('log', 'print', 'write', 'output', 'display'):
+            ('loguje', 'logs'),
+        # Registration
+        ('register', 'bind', 'attach', 'connect', 'hook'):
+            ('rejestruje', 'registers'),
+        # Caching
+        ('cache', 'memoize', 'store', 'save', 'persist'):
+            ('cachuje', 'caches'),
+    }
+    def __init__(self, lang: str = 'en'):
+        """
+        Initialize the intent generator.
+        Args:
+            lang: Language for intent output ('en' or 'pl')
+        """
+        self.lang = lang
+        self.lemmatizer = None
+        self.nlp = None
+        # Initialize NLTK lemmatizer if available
+        if NLTK_AVAILABLE:
+            try:
+                nltk.data.find('corpora/wordnet')
+                self.lemmatizer = WordNetLemmatizer()
+            except LookupError:
+                try:
+                    nltk.download('wordnet', quiet=True)
+                    self.lemmatizer = WordNetLemmatizer()
+                except Exception:
+                    pass
+        # Initialize spaCy if available (for more advanced NLP)
+        if SPACY_AVAILABLE:
+            try:
+                model = 'pl_core_news_sm' if lang == 'pl' else 'en_core_web_sm'
+                self.nlp = spacy.load(model)
+            except OSError:
+                try:
+                    self.nlp = spacy.load('en_core_web_sm')
+                except OSError:
+                    pass
+    def generate(self, name: str, docstring: Optional[str] = None) -> str:
+        """
+        Generate intent from function name and optional docstring.
+        Args:
+            name: Function or method name
+            docstring: Optional docstring to extract intent from
+        Returns:
+            Human-readable intent description
+        Example:
+            >>> gen = EnhancedIntentGenerator()
+            >>> gen.generate("calculateTotalPrice")
+            'processes total price'
+        """
+        # Try docstring first
+        if docstring:
+            intent = self._extract_from_docstring(docstring)
+            if intent and len(intent) >= 10:
+                return intent[:80]
+        # Parse function name
+        words = self._split_name(name)
+        if not words:
+            return name
+        first_word = words[0].lower()
+        rest = ' '.join(words[1:]).lower() if len(words) > 1 else ''
+        # Lemmatize if available
+        if self.lemmatizer:
+            try:
+                first_word = self.lemmatizer.lemmatize(first_word, pos='v')
+            except Exception:
+                pass
+        # Match against verb patterns
+        intent_idx = 0 if self.lang == 'pl' else 1
+        for verbs, intents in self.VERB_PATTERNS.items():
+            if first_word in verbs:
+                intent = intents[intent_idx]
+                return f"{intent} {rest}" if rest else intent
+        # Fallback - join words
+        return ' '.join(words).lower()
+    def _extract_from_docstring(self, docstring: str) -> Optional[str]:
+        """Extract intent from docstring's first line."""
+        if not docstring:
+            return None
+        first_line = docstring.split('\n')[0].strip()
+        # Remove common prefixes
+        prefixes = [
+            'Returns', 'Return', 'Gets', 'Get', 'Sets', 'Set',
+            'Creates', 'Create', 'Deletes', 'Delete',
+            'The', 'A', 'An'
+        ]
+        for prefix in prefixes:
+            if first_line.startswith(prefix + ' '):
+                first_line = first_line[len(prefix)+1:]
+                break
+        return first_line[:80] if first_line else None
+    def _split_name(self, name: str) -> List[str]:
+        """
+        Split function name into words.
+        Handles:
+        - camelCase
+        - PascalCase
+        - snake_case
+        - kebab-case
+        - ACRONYMS (e.g., XMLParser -> XML Parser)
+        """
+        # Remove private prefixes
+        name = name.lstrip('_').lstrip('#')
+        # Handle kebab-case
+        name = name.replace('-', '_')
+        # snake_case
+        if '_' in name:
+            return [w for w in name.split('_') if w]
+        # camelCase/PascalCase with acronym support
+        # XMLParser -> XML Parser, parseXML -> parse XML
+        words = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', name)
+        words = re.sub(r'([a-z\d])([A-Z])', r'\1 \2', words)
+        return [w.strip() for w in words.split() if w.strip()]
+    @classmethod
+    def get_available_features(cls) -> dict[str, bool]:
+        """
+        Get dictionary of available NLP features.
+        Returns:
+            Dict with feature names and availability status
+        """
+        return {
+            'nltk_lemmatizer': NLTK_AVAILABLE,
+            'spacy': SPACY_AVAILABLE,
+        }

code2logic/llm.py ADDED Viewed

@@ -0,0 +1,449 @@
+"""
+LLM Integration for Code2Logic
+Provides integration with local Ollama and LiteLLM for:
+- Code generation from CSV analysis
+- Refactoring suggestions
+- Duplicate detection with semantic analysis
+- Code translation between languages
+Usage:
+    from code2logic.llm import CodeAnalyzer
+    analyzer = CodeAnalyzer(model="qwen2.5-coder:7b")
+    suggestions = analyzer.suggest_refactoring(project_info)
+"""
+import json
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass
+# Optional imports
+try:
+    import httpx
+    HTTPX_AVAILABLE = True
+except ImportError:
+    HTTPX_AVAILABLE = False
+try:
+    from litellm import completion
+    LITELLM_AVAILABLE = True
+except ImportError:
+    LITELLM_AVAILABLE = False
+@dataclass
+class LLMConfig:
+    """Configuration for LLM backend."""
+    provider: str = "ollama"  # "ollama" or "litellm"
+    model: str = "qwen2.5-coder:7b"
+    base_url: str = "http://localhost:11434"
+    api_key: Optional[str] = None
+    timeout: int = 120
+    temperature: float = 0.7
+    max_tokens: int = 2000
+class OllamaClient:
+    """Direct Ollama API client."""
+    def __init__(self, config: LLMConfig):
+        if not HTTPX_AVAILABLE:
+            raise ImportError("httpx required: pip install httpx")
+        self.config = config
+        self.client = httpx.Client(timeout=config.timeout)
+    def generate(self, prompt: str, system: Optional[str] = None) -> str:
+        """Generate completion from Ollama."""
+        payload = {
+            "model": self.config.model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {
+                "temperature": self.config.temperature,
+                "num_predict": self.config.max_tokens,
+            }
+        }
+        if system:
+            payload["system"] = system
+        response = self.client.post(
+            f"{self.config.base_url}/api/generate",
+            json=payload
+        )
+        response.raise_for_status()
+        return response.json().get("response", "")
+    def chat(self, messages: List[Dict[str, str]]) -> str:
+        """Chat completion from Ollama."""
+        payload = {
+            "model": self.config.model,
+            "messages": messages,
+            "stream": False,
+            "options": {
+                "temperature": self.config.temperature,
+                "num_predict": self.config.max_tokens,
+            }
+        }
+        response = self.client.post(
+            f"{self.config.base_url}/api/chat",
+            json=payload
+        )
+        response.raise_for_status()
+        return response.json().get("message", {}).get("content", "")
+    def is_available(self) -> bool:
+        """Check if Ollama is running."""
+        try:
+            response = self.client.get(f"{self.config.base_url}/api/tags")
+            return response.status_code == 200
+        except Exception:
+            return False
+    def list_models(self) -> List[str]:
+        """List available models."""
+        try:
+            response = self.client.get(f"{self.config.base_url}/api/tags")
+            data = response.json()
+            return [m["name"] for m in data.get("models", [])]
+        except Exception:
+            return []
+class LiteLLMClient:
+    """LiteLLM client for unified API access."""
+    def __init__(self, config: LLMConfig):
+        if not LITELLM_AVAILABLE:
+            raise ImportError("litellm required: pip install litellm")
+        self.config = config
+    def generate(self, prompt: str, system: Optional[str] = None) -> str:
+        """Generate completion via LiteLLM."""
+        messages = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": prompt})
+        return self.chat(messages)
+    def chat(self, messages: List[Dict[str, str]]) -> str:
+        """Chat completion via LiteLLM."""
+        model = f"ollama/{self.config.model}"
+        if self.config.provider == "litellm":
+            model = self.config.model
+        response = completion(
+            model=model,
+            messages=messages,
+            api_base=self.config.base_url,
+            temperature=self.config.temperature,
+            max_tokens=self.config.max_tokens,
+        )
+        return response.choices[0].message.content
+    def is_available(self) -> bool:
+        """Check if LiteLLM backend is available."""
+        try:
+            self.chat([{"role": "user", "content": "test"}])
+            return True
+        except Exception:
+            return False
+class CodeAnalyzer:
+    """
+    LLM-powered code analysis for Code2Logic.
+    Example:
+        >>> from code2logic import analyze_project
+        >>> from code2logic.llm import CodeAnalyzer
+        >>>
+        >>> project = analyze_project("/path/to/project")
+        >>> analyzer = CodeAnalyzer()
+        >>>
+        >>> # Get refactoring suggestions
+        >>> suggestions = analyzer.suggest_refactoring(project)
+        >>>
+        >>> # Generate code in another language
+        >>> code = analyzer.generate_code(project, target_lang="typescript")
+    """
+    SYSTEM_PROMPT = """You are an expert software architect and code analyst.
+You analyze code structure and provide actionable suggestions for:
+- Refactoring and code improvement
+- Duplicate detection and consolidation
+- Code generation and translation
+- Architecture optimization
+Be specific, practical, and provide code examples when helpful."""
+    def __init__(
+        self,
+        model: str = "qwen2.5-coder:7b",
+        provider: str = "ollama",
+        base_url: str = "http://localhost:11434",
+        **kwargs
+    ):
+        """
+        Initialize CodeAnalyzer.
+        Args:
+            model: Model name (e.g., "qwen2.5-coder:7b")
+            provider: "ollama" or "litellm"
+            base_url: API base URL
+        """
+        self.config = LLMConfig(
+            provider=provider,
+            model=model,
+            base_url=base_url,
+            **kwargs
+        )
+        if provider == "ollama":
+            self.client = OllamaClient(self.config)
+        else:
+            self.client = LiteLLMClient(self.config)
+    def is_available(self) -> bool:
+        """Check if LLM backend is available."""
+        return self.client.is_available()
+    def suggest_refactoring(self, project) -> List[Dict[str, Any]]:
+        """
+        Analyze project and suggest refactoring improvements.
+        Args:
+            project: ProjectInfo from code2logic analysis
+        Returns:
+            List of refactoring suggestions with details
+        """
+        from .generators import CSVGenerator
+        # Generate compact representation
+        csv_gen = CSVGenerator()
+        csv_data = csv_gen.generate(project, detail='full')
+        # Truncate if too long
+        if len(csv_data) > 8000:
+            lines = csv_data.split('\n')
+            csv_data = '\n'.join(lines[:100]) + f"\n... ({len(lines)-100} more lines)"
+        prompt = f"""Analyze this codebase and suggest refactoring improvements:
+```csv
+{csv_data}
+```
+For each suggestion, provide:
+1. Issue type (complexity, duplication, naming, structure)
+2. Specific location (path, function name)
+3. Problem description
+4. Recommended fix with code example if applicable
+5. Priority (high/medium/low)
+Format as JSON array."""
+        response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
+        # Try to parse JSON from response
+        try:
+            # Find JSON in response
+            start = response.find('[')
+            end = response.rfind(']') + 1
+            if start >= 0 and end > start:
+                return json.loads(response[start:end])
+        except json.JSONDecodeError:
+            pass
+        # Return raw response if JSON parsing fails
+        return [{"raw_response": response}]
+    def find_semantic_duplicates(self, project) -> List[Dict[str, Any]]:
+        """
+        Find semantically similar functions using LLM.
+        Args:
+            project: ProjectInfo from code2logic analysis
+        Returns:
+            List of duplicate groups with similarity analysis
+        """
+        # Collect all functions with intents
+        functions = []
+        for m in project.modules:
+            for f in m.functions:
+                functions.append({
+                    'path': m.path,
+                    'name': f.name,
+                    'signature': self._build_signature(f),
+                    'intent': f.intent or '',
+                })
+            for c in m.classes:
+                for method in c.methods:
+                    functions.append({
+                        'path': m.path,
+                        'name': f"{c.name}.{method.name}",
+                        'signature': self._build_signature(method),
+                        'intent': method.intent or '',
+                    })
+        if len(functions) > 50:
+            functions = functions[:50]
+        prompt = f"""Analyze these functions and find semantic duplicates:
+{json.dumps(functions, indent=2)}
+Group functions that:
+1. Do the same thing (even with different names)
+2. Have similar logic patterns
+3. Could be consolidated into shared utilities
+For each group, explain:
+- Why they are duplicates
+- How to consolidate them
+- Suggested shared function name
+Format as JSON array of groups."""
+        response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
+        try:
+            start = response.find('[')
+            end = response.rfind(']') + 1
+            if start >= 0 and end > start:
+                return json.loads(response[start:end])
+        except json.JSONDecodeError:
+            pass
+        return [{"raw_response": response}]
+    def generate_code(
+        self,
+        project,
+        target_lang: str,
+        module_filter: Optional[str] = None
+    ) -> Dict[str, str]:
+        """
+        Generate code in target language from project analysis.
+        Args:
+            project: ProjectInfo from code2logic analysis
+            target_lang: Target language (typescript, python, go, rust, etc.)
+            module_filter: Optional filter for specific module paths
+        Returns:
+            Dict mapping original path to generated code
+        """
+        results = {}
+        modules = project.modules
+        if module_filter:
+            modules = [m for m in modules if module_filter in m.path]
+        for module in modules[:5]:  # Limit to 5 modules
+            # Build specification
+            spec_lines = [f"Module: {module.path}"]
+            spec_lines.append(f"Language: {module.language}")
+            spec_lines.append(f"Lines: {module.lines_code}")
+            if module.imports:
+                spec_lines.append(f"Imports: {', '.join(module.imports[:10])}")
+            if module.classes:
+                spec_lines.append("\nClasses:")
+                for c in module.classes[:5]:
+                    spec_lines.append(f"  class {c.name}({', '.join(c.bases)})")
+                    for m in c.methods[:10]:
+                        spec_lines.append(f"    - {m.name}{self._build_signature(m)}: {m.intent}")
+            if module.functions:
+                spec_lines.append("\nFunctions:")
+                for f in module.functions[:10]:
+                    spec_lines.append(f"  - {f.name}{self._build_signature(f)}: {f.intent}")
+            spec = '\n'.join(spec_lines)
+            prompt = f"""Generate {target_lang} code from this specification:
+{spec}
+Requirements:
+1. Idiomatic {target_lang} code
+2. Full type annotations
+3. Docstrings/comments
+4. Error handling
+5. Maintain the same public API
+Output only the code."""
+            response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
+            results[module.path] = response
+        return results
+    def translate_function(
+        self,
+        name: str,
+        signature: str,
+        intent: str,
+        source_lang: str,
+        target_lang: str
+    ) -> str:
+        """
+        Translate a single function to another language.
+        Args:
+            name: Function name
+            signature: Function signature
+            intent: What the function does
+            source_lang: Source language
+            target_lang: Target language
+        Returns:
+            Generated code in target language
+        """
+        prompt = f"""Translate this {source_lang} function to {target_lang}:
+Function: {name}
+Signature: {signature}
+Purpose: {intent}
+Generate idiomatic {target_lang} code with:
+1. Proper type annotations
+2. Error handling
+3. Documentation
+Output only the code."""
+        return self.client.generate(prompt, system=self.SYSTEM_PROMPT)
+    def _build_signature(self, f) -> str:
+        """Build compact signature."""
+        params = ','.join(f.params[:4])
+        if len(f.params) > 4:
+            params += '...'
+        ret = f"->{f.return_type}" if f.return_type else ""
+        return f"({params}){ret}"
+def get_available_backends() -> Dict[str, bool]:
+    """Get availability status of LLM backends."""
+    status = {
+        'httpx': HTTPX_AVAILABLE,
+        'litellm': LITELLM_AVAILABLE,
+        'ollama': False,
+    }
+    if HTTPX_AVAILABLE:
+        try:
+            client = OllamaClient(LLMConfig())
+            status['ollama'] = client.is_available()
+        except Exception:
+            pass
+    return status