PyPI - code2logic - Versions diffs - 1.0.0__py3-none-any.whl - Mend

code2logic 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

code2logic/__init__.py +88 -0
code2logic/analyzer.py +286 -0
code2logic/cli.py +222 -0
code2logic/dependency.py +246 -0
code2logic/generators.py +1017 -0
code2logic/gherkin.py +980 -0
code2logic/intent.py +246 -0
code2logic/llm.py +449 -0
code2logic/mcp_server.py +354 -0
code2logic/models.py +170 -0
code2logic/parsers.py +908 -0
code2logic/py.typed +2 -0
code2logic/similarity.py +165 -0
code2logic-1.0.0.dist-info/METADATA +322 -0
code2logic-1.0.0.dist-info/RECORD +18 -0
code2logic-1.0.0.dist-info/WHEEL +4 -0
code2logic-1.0.0.dist-info/entry_points.txt +3 -0
code2logic-1.0.0.dist-info/licenses/LICENSE +201 -0

code2logic/__init__.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""
+Code2Logic - Convert source code to logical representation for LLM analysis.
+A Python library that analyzes codebases and generates compact, LLM-friendly
+representations with semantic understanding using NLP and AST parsing.
+Features:
+- Multi-language support (Python, JavaScript, TypeScript, Java, Go, Rust, etc.)
+- Tree-sitter AST parsing for 99% accuracy
+- NetworkX dependency graph analysis with PageRank
+- Rapidfuzz similarity detection for duplicate functions
+- NLP-powered intent extraction from function names and docstrings
+Example:
+    >>> from code2logic import analyze_project, MarkdownGenerator
+    >>> project = analyze_project("/path/to/project")
+    >>> output = MarkdownGenerator().generate(project)
+    >>> print(output)
+"""
+__version__ = "1.0.0"
+__author__ = "Softreck"
+__email__ = "info@softreck.dev"
+__license__ = "MIT"
+from .analyzer import (
+    ProjectAnalyzer,
+    analyze_project,
+)
+from .models import (
+    FunctionInfo,
+    ClassInfo,
+    TypeInfo,
+    ModuleInfo,
+    DependencyNode,
+    ProjectInfo,
+)
+from .generators import (
+    MarkdownGenerator,
+    CompactGenerator,
+    JSONGenerator,
+    YAMLGenerator,
+    CSVGenerator,
+)
+from .gherkin import (
+    GherkinGenerator,
+    StepDefinitionGenerator,
+    CucumberYAMLGenerator,
+    csv_to_gherkin,
+    gherkin_to_test_data,
+)
+from .intent import EnhancedIntentGenerator
+from .parsers import TreeSitterParser, UniversalParser
+from .dependency import DependencyAnalyzer
+from .similarity import SimilarityDetector
+__all__ = [
+    # Version
+    "__version__",
+    # Main API
+    "analyze_project",
+    "ProjectAnalyzer",
+    # Models
+    "FunctionInfo",
+    "ClassInfo",
+    "TypeInfo",
+    "ModuleInfo",
+    "DependencyNode",
+    "ProjectInfo",
+    # Generators
+    "MarkdownGenerator",
+    "CompactGenerator",
+    "JSONGenerator",
+    "YAMLGenerator",
+    "CSVGenerator",
+    # Gherkin/BDD
+    "GherkinGenerator",
+    "StepDefinitionGenerator",
+    "CucumberYAMLGenerator",
+    "csv_to_gherkin",
+    "gherkin_to_test_data",
+    # Components
+    "EnhancedIntentGenerator",
+    "TreeSitterParser",
+    "UniversalParser",
+    "DependencyAnalyzer",
+    "SimilarityDetector",
+]

code2logic/analyzer.py ADDED Viewed

@@ -0,0 +1,286 @@
+"""
+Main project analyzer orchestrating all analysis components.
+Provides the high-level API for analyzing codebases.
+"""
+import sys
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+from typing import Optional, List, Dict
+from .models import ProjectInfo, ModuleInfo
+from .parsers import TreeSitterParser, UniversalParser, TREE_SITTER_AVAILABLE
+from .dependency import DependencyAnalyzer, NETWORKX_AVAILABLE
+from .similarity import SimilarityDetector, RAPIDFUZZ_AVAILABLE
+from .intent import NLTK_AVAILABLE, SPACY_AVAILABLE
+class ProjectAnalyzer:
+    """
+    Main class for analyzing software projects.
+    Orchestrates:
+    - File scanning and language detection
+    - AST parsing (Tree-sitter or fallback)
+    - Dependency graph building and analysis
+    - Similar function detection
+    - Entry point identification
+    Example:
+        >>> analyzer = ProjectAnalyzer("/path/to/project")
+        >>> project = analyzer.analyze()
+        >>> print(f"Found {project.total_files} files")
+    With options:
+        >>> analyzer = ProjectAnalyzer(
+        ...     "/path/to/project",
+        ...     use_treesitter=True,
+        ...     verbose=True
+        ... )
+    """
+    # Language extension mapping
+    LANGUAGE_EXTENSIONS: Dict[str, str] = {
+        '.py': 'python',
+        '.js': 'javascript',
+        '.jsx': 'javascript',
+        '.ts': 'typescript',
+        '.tsx': 'typescript',
+        '.java': 'java',
+        '.go': 'go',
+        '.rs': 'rust',
+        '.c': 'cpp',
+        '.cpp': 'cpp',
+        '.cc': 'cpp',
+        '.h': 'cpp',
+        '.hpp': 'cpp',
+        '.php': 'php',
+        '.rb': 'ruby',
+        '.kt': 'kotlin',
+        '.swift': 'swift',
+    }
+    # Directories to ignore
+    IGNORE_DIRS: set = {
+        '.git', '.svn', '.hg',
+        'node_modules', '__pycache__', '.venv', 'venv', 'env',
+        'target', 'build', 'dist', 'out', '.next',
+        '.idea', '.vscode', '.pytest_cache',
+        'vendor', 'packages', '.tox', 'coverage',
+        '.mypy_cache', '.ruff_cache', '.cache',
+    }
+    # Files to ignore
+    IGNORE_FILES: set = {
+        '.gitignore', '.dockerignore',
+        'package-lock.json', 'yarn.lock',
+        'Pipfile.lock', 'poetry.lock',
+        'Cargo.lock', 'pnpm-lock.yaml',
+    }
+    def __init__(
+        self,
+        root_path: str,
+        use_treesitter: bool = True,
+        verbose: bool = False,
+        include_private: bool = False,
+    ):
+        """
+        Initialize the project analyzer.
+        Args:
+            root_path: Path to the project root directory
+            use_treesitter: Whether to use Tree-sitter for parsing
+            verbose: Whether to print status messages
+            include_private: Whether to include private functions/classes
+        """
+        self.root_path = Path(root_path).resolve()
+        self.verbose = verbose
+        self.include_private = include_private
+        self.modules: List[ModuleInfo] = []
+        self.languages: Dict[str, int] = defaultdict(int)
+        # Initialize parsers
+        self.ts_parser = (
+            TreeSitterParser()
+            if use_treesitter and TREE_SITTER_AVAILABLE
+            else None
+        )
+        self.fallback_parser = UniversalParser()
+        # Initialize analyzers
+        self.dep_analyzer = DependencyAnalyzer()
+        self.sim_detector = SimilarityDetector()
+        if verbose:
+            self._print_status()
+    def _print_status(self):
+        """Print library availability status."""
+        parts = []
+        parts.append("TS✓" if TREE_SITTER_AVAILABLE else "TS✗")
+        parts.append("NX✓" if NETWORKX_AVAILABLE else "NX✗")
+        parts.append("RF✓" if RAPIDFUZZ_AVAILABLE else "RF✗")
+        parts.append("NLP✓" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP✗")
+        print(f"Libs: {' '.join(parts)}", file=sys.stderr)
+    def analyze(self) -> ProjectInfo:
+        """
+        Analyze the project.
+        Returns:
+            ProjectInfo with complete analysis results
+        """
+        # Scan and parse files
+        self._scan_files()
+        # Build dependency graph
+        dep_graph = self.dep_analyzer.build_graph(self.modules)
+        dep_metrics = self.dep_analyzer.analyze_metrics()
+        # Detect entry points
+        entrypoints = self._detect_entrypoints()
+        # Find similar functions
+        similar = self.sim_detector.find_similar_functions(self.modules)
+        return ProjectInfo(
+            name=self.root_path.name,
+            root_path=str(self.root_path),
+            languages=dict(self.languages),
+            modules=self.modules,
+            dependency_graph=dep_graph,
+            dependency_metrics=dep_metrics,
+            entrypoints=entrypoints,
+            similar_functions=similar,
+            total_files=len(self.modules),
+            total_lines=sum(m.lines_total for m in self.modules),
+            generated_at=datetime.now().isoformat()
+        )
+    def _scan_files(self):
+        """Scan and parse all source files."""
+        for fp in self.root_path.rglob('*'):
+            if not fp.is_file():
+                continue
+            # Skip ignored directories
+            if any(d in fp.parts for d in self.IGNORE_DIRS):
+                continue
+            # Skip ignored files
+            if fp.name in self.IGNORE_FILES:
+                continue
+            # Check extension
+            ext = fp.suffix.lower()
+            if ext not in self.LANGUAGE_EXTENSIONS:
+                continue
+            language = self.LANGUAGE_EXTENSIONS[ext]
+            self.languages[language] += 1
+            # Read file
+            try:
+                content = fp.read_text(encoding='utf-8', errors='ignore')
+            except Exception:
+                continue
+            rel_path = str(fp.relative_to(self.root_path))
+            # Try Tree-sitter first, then fallback
+            module = None
+            if self.ts_parser and self.ts_parser.is_available(language):
+                module = self.ts_parser.parse(rel_path, content, language)
+            if module is None:
+                module = self.fallback_parser.parse(rel_path, content, language)
+            if module:
+                self.modules.append(module)
+    def _detect_entrypoints(self) -> List[str]:
+        """Detect project entry points."""
+        eps = []
+        # From dependency analyzer (nodes with no incoming edges)
+        if self.dep_analyzer.graph is not None:
+            eps.extend(self.dep_analyzer.get_entrypoints())
+        # Common entry point file names
+        main_files = {
+            'main.py', 'app.py', 'server.py', '__main__.py', 'run.py',
+            'main.js', 'app.js', 'server.js', 'index.js',
+            'main.ts', 'app.ts', 'server.ts', 'index.ts',
+            'main.go', 'main.rs', 'Main.java',
+        }
+        for m in self.modules:
+            fn = Path(m.path).name
+            parent = str(Path(m.path).parent)
+            if fn in main_files and m.path not in eps:
+                eps.append(m.path)
+            elif fn in ('index.js', 'index.ts') and parent in ('.', 'src') and m.path not in eps:
+                eps.append(m.path)
+        return eps[:10]
+    def get_statistics(self) -> Dict:
+        """
+        Get analysis statistics.
+        Returns:
+            Dict with analysis statistics
+        """
+        return {
+            'total_files': len(self.modules),
+            'total_lines': sum(m.lines_total for m in self.modules),
+            'total_code_lines': sum(m.lines_code for m in self.modules),
+            'languages': dict(self.languages),
+            'total_classes': sum(len(m.classes) for m in self.modules),
+            'total_functions': sum(len(m.functions) for m in self.modules),
+        }
+def analyze_project(
+    path: str,
+    use_treesitter: bool = True,
+    verbose: bool = False,
+) -> ProjectInfo:
+    """
+    Convenience function to analyze a project.
+    Args:
+        path: Path to the project directory
+        use_treesitter: Whether to use Tree-sitter for parsing
+        verbose: Whether to print status messages
+    Returns:
+        ProjectInfo with analysis results
+    Example:
+        >>> from code2logic import analyze_project
+        >>> project = analyze_project("/path/to/project")
+        >>> print(f"Analyzed {project.total_files} files")
+    """
+    analyzer = ProjectAnalyzer(path, use_treesitter=use_treesitter, verbose=verbose)
+    return analyzer.analyze()
+def get_library_status() -> Dict[str, bool]:
+    """
+    Get availability status of optional libraries.
+    Returns:
+        Dict mapping library name to availability status
+    """
+    return {
+        'tree_sitter': TREE_SITTER_AVAILABLE,
+        'networkx': NETWORKX_AVAILABLE,
+        'rapidfuzz': RAPIDFUZZ_AVAILABLE,
+        'nltk': NLTK_AVAILABLE,
+        'spacy': SPACY_AVAILABLE,
+    }

code2logic/cli.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""
+Command-line interface for Code2Logic.
+Usage:
+    code2logic /path/to/project
+    code2logic /path/to/project -f csv -o output.csv
+    code2logic /path/to/project -f yaml
+    code2logic /path/to/project -f json --flat
+"""
+import argparse
+import os
+import sys
+import subprocess
+from . import __version__
+def ensure_dependencies():
+    """Auto-install optional dependencies for best results."""
+    packages = {
+        'tree-sitter': 'tree_sitter',
+        'tree-sitter-python': 'tree_sitter_python',
+        'tree-sitter-javascript': 'tree_sitter_javascript',
+        'tree-sitter-typescript': 'tree_sitter_typescript',
+        'networkx': 'networkx',
+        'rapidfuzz': 'rapidfuzz',
+        'pyyaml': 'yaml',
+    }
+    missing = []
+    for pkg_name, import_name in packages.items():
+        try:
+            __import__(import_name)
+        except ImportError:
+            missing.append(pkg_name)
+    if missing:
+        print(f"Installing dependencies for best results: {', '.join(missing)}", file=sys.stderr)
+        try:
+            subprocess.check_call([
+                sys.executable, '-m', 'pip', 'install', '-q',
+                '--break-system-packages', *missing
+            ], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
+            print("Dependencies installed successfully!", file=sys.stderr)
+        except subprocess.CalledProcessError:
+            # Try without --break-system-packages
+            try:
+                subprocess.check_call([
+                    sys.executable, '-m', 'pip', 'install', '-q', *missing
+                ], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
+                print("Dependencies installed successfully!", file=sys.stderr)
+            except subprocess.CalledProcessError:
+                print(f"Warning: Could not install some dependencies. "
+                      f"Install manually: pip install {' '.join(missing)}", file=sys.stderr)
+def main():
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(
+        prog='code2logic',
+        description='Convert source code to logical representation for LLM analysis',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+Examples:
+  code2logic /path/to/project                    # Standard Markdown
+  code2logic /path/to/project -f csv             # CSV (best for LLM, ~50%% smaller)
+  code2logic /path/to/project -f yaml            # YAML (human-readable)
+  code2logic /path/to/project -f json --flat     # Flat JSON (for comparisons)
+  code2logic /path/to/project -f compact         # Ultra-compact text
+Output formats (token efficiency):
+  csv      - Best for LLM (~20K tokens/100 files) - flat table
+  compact  - Good for LLM (~25K tokens/100 files) - minimal text
+  json     - Standard (~35K tokens/100 files) - nested/flat
+  yaml     - Readable (~35K tokens/100 files) - nested/flat
+  markdown - Documentation (~55K tokens/100 files)
+Detail levels (columns in csv/json/yaml):
+  minimal  - path, type, name, signature (4 columns)
+  standard - + intent, category, domain, imports (8 columns)
+  full     - + calls, lines, complexity, hash (16 columns)
+'''
+    )
+    parser.add_argument(
+        'path',
+        nargs='?',
+        default=None,
+        help='Path to the project directory'
+    )
+    parser.add_argument(
+        '-f', '--format',
+        choices=['markdown', 'compact', 'json', 'yaml', 'csv', 'gherkin'],
+        default='markdown',
+        help='Output format (default: markdown)'
+    )
+    parser.add_argument(
+        '-d', '--detail',
+        choices=['minimal', 'standard', 'full'],
+        default='standard',
+        help='Detail level - columns to include (default: standard)'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file path (default: stdout)'
+    )
+    parser.add_argument(
+        '--flat',
+        action='store_true',
+        help='Use flat structure (for json/yaml) - better for comparisons'
+    )
+    parser.add_argument(
+        '--no-install',
+        action='store_true',
+        help='Skip auto-installation of dependencies'
+    )
+    parser.add_argument(
+        '--no-treesitter',
+        action='store_true',
+        help='Disable Tree-sitter (use fallback parser)'
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Verbose output'
+    )
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=f'%(prog)s {__version__}'
+    )
+    parser.add_argument(
+        '--status',
+        action='store_true',
+        help='Show library availability status and exit'
+    )
+    args = parser.parse_args()
+    # Auto-install dependencies unless disabled
+    if not args.no_install and not args.status:
+        ensure_dependencies()
+    # Import after potential installation
+    from .analyzer import ProjectAnalyzer, get_library_status
+    from .generators import (
+        MarkdownGenerator, CompactGenerator, JSONGenerator,
+        YAMLGenerator, CSVGenerator
+    )
+    from .gherkin import GherkinGenerator
+    # Status check
+    if args.status:
+        status = get_library_status()
+        print("Library Status:")
+        for lib, available in status.items():
+            symbol = "✓" if available else "✗"
+            print(f"  {lib}: {symbol}")
+        sys.exit(0)
+    # Path is required for analysis
+    if args.path is None:
+        print("Error: path is required", file=sys.stderr)
+        parser.print_help()
+        sys.exit(1)
+    # Validate path
+    if not os.path.exists(args.path):
+        print(f"Error: Path does not exist: {args.path}", file=sys.stderr)
+        sys.exit(1)
+    if not os.path.isdir(args.path):
+        print(f"Error: Path is not a directory: {args.path}", file=sys.stderr)
+        sys.exit(1)
+    # Analyze
+    if args.verbose:
+        print(f"Analyzing project: {args.path}", file=sys.stderr)
+    analyzer = ProjectAnalyzer(
+        args.path,
+        use_treesitter=not args.no_treesitter,
+        verbose=args.verbose
+    )
+    project = analyzer.analyze()
+    if args.verbose:
+        print(f"Found {project.total_files} files, {project.total_lines} lines", file=sys.stderr)
+    # Generate output
+    if args.format == 'markdown':
+        generator = MarkdownGenerator()
+        output = generator.generate(project, args.detail)
+    elif args.format == 'compact':
+        generator = CompactGenerator()
+        output = generator.generate(project)
+    elif args.format == 'json':
+        generator = JSONGenerator()
+        output = generator.generate(project, flat=args.flat, detail=args.detail)
+    elif args.format == 'yaml':
+        generator = YAMLGenerator()
+        output = generator.generate(project, flat=args.flat, detail=args.detail)
+    elif args.format == 'csv':
+        generator = CSVGenerator()
+        output = generator.generate(project, detail=args.detail)
+    elif args.format == 'gherkin':
+        generator = GherkinGenerator()
+        output = generator.generate(project, detail=args.detail)
+    # Write output
+    if args.output:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            f.write(output)
+        if args.verbose:
+            print(f"Output written to: {args.output}", file=sys.stderr)
+    else:
+        print(output)
+if __name__ == '__main__':
+    main()