PyPI - code2llm - Versions diffs - 0.5.51__tar.gz → 0.5.52__tar.gz - Mend

code2llm 0.5.51tar.gz → 0.5.52tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

{code2llm-0.5.51 → code2llm-0.5.52}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code2llm
-Version: 0.5.51
+Version: 0.5.52
 Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
 Home-page: https://github.com/wronai/stts
 Author: STTS Project

{code2llm-0.5.51 → code2llm-0.5.52}/code2llm/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
 and entity resolution with multilingual support.
 """
-__version__ = "0.5.51"
+__version__ = "0.5.52"
 __author__ = "STTS Project"
 # Core analysis components (lightweight, always needed)

{code2llm-0.5.51 → code2llm-0.5.52}/code2llm/analysis/data_analysis.py RENAMED Viewed

@@ -4,6 +4,35 @@ from typing import Any, Dict, List
 from ..core.models import AnalysisResult
+_INPUT_INDICATORS = ['parse', 'load', 'read', 'fetch', 'get', 'input', 'receive', 'extract']
+_TRANSFORM_INDICATORS = ['transform', 'convert', 'process', 'validate', 'filter', 'map', 'reduce', 'compute']
+_OUTPUT_INDICATORS = ['serialize', 'format', 'write', 'save', 'send', 'output', 'render', 'encode']
+_MAX_PIPELINES = 15
+def _categorize_functions(result: 'AnalysisResult'):
+    """Categorize functions into input/transform/output based on name patterns."""
+    input_funcs, transform_funcs, output_funcs = [], [], []
+    for func_name, func in result.functions.items():
+        name_lower = func.name.lower()
+        if any(ind in name_lower for ind in _INPUT_INDICATORS):
+            input_funcs.append((func_name, func))
+        elif any(ind in name_lower for ind in _TRANSFORM_INDICATORS):
+            transform_funcs.append((func_name, func))
+        elif any(ind in name_lower for ind in _OUTPUT_INDICATORS):
+            output_funcs.append((func_name, func))
+    return input_funcs, transform_funcs, output_funcs
+def _make_stage(label: str, func_name: str, func) -> Dict[str, str]:
+    """Build a single pipeline stage dict."""
+    return {
+        'stage': label,
+        'function': func_name,
+        'description': func.docstring[:100] if func.docstring else 'N/A',
+    }
 class DataAnalyzer:
     """Analyze data flows, structures, and optimization opportunities."""
@@ -32,40 +61,27 @@ class DataAnalyzer:
     def _find_data_pipelines(self, result: AnalysisResult) -> list:
         """Find data transformation pipelines in the codebase."""
+        input_funcs, transform_funcs, output_funcs = _categorize_functions(result)
         pipelines = []
-        input_indicators = ['parse', 'load', 'read', 'fetch', 'get', 'input', 'receive', 'extract']
-        transform_indicators = ['transform', 'convert', 'process', 'validate', 'filter', 'map', 'reduce', 'compute']
-        output_indicators = ['serialize', 'format', 'write', 'save', 'send', 'output', 'render', 'encode']
-        input_funcs = []
-        transform_funcs = []
-        output_funcs = []
-        for func_name, func in result.functions.items():
-            name_lower = func.name.lower()
-            if any(ind in name_lower for ind in input_indicators):
-                input_funcs.append((func_name, func))
-            elif any(ind in name_lower for ind in transform_indicators):
-                transform_funcs.append((func_name, func))
-            elif any(ind in name_lower for ind in output_indicators):
-                output_funcs.append((func_name, func))
         for in_name, in_func in input_funcs[:20]:
             for t_name, t_func in transform_funcs[:30]:
-                if t_name in in_func.calls:
-                    for out_name, out_func in output_funcs[:20]:
-                        if out_name in t_func.calls:
-                            pipelines.append({
-                                'pipeline_id': f"pipeline_{len(pipelines)+1}",
-                                'stages': [
-                                    {'stage': 'input', 'function': in_name, 'description': in_func.docstring[:100] if in_func.docstring else 'N/A'},
-                                    {'stage': 'transform', 'function': t_name, 'description': t_func.docstring[:100] if t_func.docstring else 'N/A'},
-                                    {'stage': 'output', 'function': out_name, 'description': out_func.docstring[:100] if out_func.docstring else 'N/A'},
-                                ],
-                                'data_flow': f"{in_name} → {t_name} → {out_name}",
-                            })
-                            if len(pipelines) >= 15:
-                                return pipelines
+                if t_name not in in_func.calls:
+                    continue
+                for out_name, out_func in output_funcs[:20]:
+                    if out_name not in t_func.calls:
+                        continue
+                    pipelines.append({
+                        'pipeline_id': f"pipeline_{len(pipelines)+1}",
+                        'stages': [
+                            _make_stage('input', in_name, in_func),
+                            _make_stage('transform', t_name, t_func),
+                            _make_stage('output', out_name, out_func),
+                        ],
+                        'data_flow': f"{in_name} → {t_name} → {out_name}",
+                    })
+                    if len(pipelines) >= _MAX_PIPELINES:
+                        return pipelines
         return pipelines
     def _find_state_patterns(self, result: AnalysisResult) -> list:

{code2llm-0.5.51 → code2llm-0.5.52}/code2llm/analysis/type_inference.py RENAMED Viewed

@@ -17,6 +17,15 @@ from ..core.models import FunctionInfo
 logger = logging.getLogger(__name__)
+# Arg name substring -> inferred type (checked in order)
+ARG_NAME_TYPE_MAP = [
+    ("path", "Path"),
+    ("name", "str"),
+    ("text", "str"),
+    ("config", "Config"),
+    ("result", "AnalysisResult"),
+]
 # Name pattern -> (consumed types, produced types)
 NAME_PATTERNS: List[Tuple[List[str], List[str], List[str]]] = [
     # (name_contains, consumed, produced)
@@ -282,26 +291,10 @@ class TypeInferenceEngine:
                 break
         # Build arg list with inferred types
-        args = []
-        for arg_name in fi.args:
-            inferred_type = None
-            if arg_name == "self":
-                inferred_type = None
-            elif consumed:
-                inferred_type = consumed[0] if consumed else None
-            elif "path" in arg_name.lower():
-                inferred_type = "Path"
-            elif "name" in arg_name.lower() or "text" in arg_name.lower():
-                inferred_type = "str"
-            elif "config" in arg_name.lower():
-                inferred_type = "Config"
-            elif "result" in arg_name.lower():
-                inferred_type = "AnalysisResult"
-            args.append({
-                "name": arg_name,
-                "type": inferred_type,
-                "has_default": False,
-            })
+        args = [
+            {"name": a, "type": self._infer_arg_type(a, consumed), "has_default": False}
+            for a in fi.args
+        ]
         ret = produced[0] if produced else None
         has_any = ret is not None or any(a["type"] for a in args)
@@ -313,3 +306,16 @@ class TypeInferenceEngine:
             "name": fi.name,
             "qualified_name": fi.qualified_name,
         }
+    @staticmethod
+    def _infer_arg_type(arg_name: str, consumed: List[str]) -> Optional[str]:
+        """Infer type for a single argument from consumed types or name patterns."""
+        if arg_name == "self":
+            return None
+        if consumed:
+            return consumed[0]
+        arg_lower = arg_name.lower()
+        for pattern, typ in ARG_NAME_TYPE_MAP:
+            if pattern in arg_lower:
+                return typ
+        return None

code2llm-0.5.52/code2llm/cli.py ADDED Viewed

@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+"""
+code2llm - CLI for Python code flow analysis
+Analyze control flow, data flow, and call graphs of Python codebases.
+"""
+import sys
+from .cli_parser import create_parser
+from .cli_commands import (
+    handle_special_commands, validate_and_setup, print_start_info,
+    validate_chunked_output,
+    # Backward compatibility aliases
+    handle_report_command as _handle_report_command,
+    generate_llm_context,
+)
+from .cli_exports import (
+    _export_evolution, _export_data_structures, _export_context_fallback,
+    _export_readme, _export_code2logic, _export_prompt_txt, _run_exports,
+    _export_simple_formats, _export_yaml, _export_mermaid, _export_refactor_prompts,
+    _export_project_yaml, _run_report,
+)
+from .cli_analysis import _run_analysis
+# Backward compatibility aliases
+_handle_special_commands = handle_special_commands
+_validate_and_setup = validate_and_setup
+_print_start_info = print_start_info
+_validate_chunked_output = validate_chunked_output
+def main():
+    """Main CLI entry point."""
+    # Handle special sub-commands first
+    special_result = handle_special_commands()
+    if special_result is not None:
+        return special_result
+    # Parse arguments
+    parser = create_parser()
+    args = parser.parse_args()
+    source_path, output_dir = validate_and_setup(args)
+    print_start_info(args, source_path, output_dir)
+    # Validate mode - only check existing output
+    if args.validate:
+        is_valid = validate_chunked_output(output_dir, args)
+        return 0 if is_valid else 1
+    # Analyze → Export
+    result = _run_analysis(args, source_path, output_dir)
+    _run_exports(args, result, output_dir, source_path=source_path)
+    # Auto-validate after chunked analysis
+    if args.chunk and args.verbose:
+        print(f"\n🔍 Auto-validating chunked output...")
+        validate_chunked_output(output_dir, args)
+    if args.verbose:
+        print(f"\nAll outputs saved to: {output_dir}")
+    return 0
+if __name__ == '__main__':
+    sys.exit(main())

code2llm-0.5.52/code2llm/cli_commands.py ADDED Viewed

@@ -0,0 +1,220 @@
+"""CLI subcommands and validation for code2llm."""
+import argparse
+import sys
+from pathlib import Path
+from typing import Optional
+from .cli_exports import _run_report
+def handle_special_commands() -> Optional[int]:
+    """Handle special sub-commands (llm-flow, llm-context, report)."""
+    if len(sys.argv) > 1 and sys.argv[1] == 'llm-flow':
+        from .generators.llm_flow import main as llm_flow_main
+        return llm_flow_main(sys.argv[2:])
+    if len(sys.argv) > 1 and sys.argv[1] == 'llm-context':
+        return generate_llm_context(sys.argv[2:])
+    if len(sys.argv) > 1 and sys.argv[1] == 'report':
+        return handle_report_command(sys.argv[2:])
+    return None
+def handle_report_command(args_list) -> int:
+    """Generate views from an existing project.yaml.
+    Usage:
+        code2llm report --format toon    # → project.toon
+        code2llm report --format context # → context.md
+        code2llm report --format article # → status.md
+        code2llm report --format html    # → dashboard.html
+        code2llm report --format all     # → all views
+    """
+    import argparse
+    parser = argparse.ArgumentParser(
+        prog='code2llm report',
+        description='Generate views from project.yaml (single source of truth)',
+    )
+    parser.add_argument(
+        '--input', '-i',
+        default='./project.yaml',
+        help='Path to project.yaml (default: ./project.yaml)',
+    )
+    parser.add_argument(
+        '--format', '-f',
+        dest='report_format',
+        default='all',
+        help='Output format: toon, context, article, html, all (default: all)',
+    )
+    parser.add_argument(
+        '-o', '--output',
+        default='.',
+        help='Output directory (default: current directory)',
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Verbose output',
+    )
+    args = parser.parse_args(args_list)
+    input_path = Path(args.input)
+    if not input_path.exists():
+        print(f"Error: project.yaml not found: {input_path}", file=sys.stderr)
+        print("Run 'code2llm <source> -f project-yaml' first to generate it.", file=sys.stderr)
+        return 1
+    output_dir = Path(args.output)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    if args.verbose:
+        print(f"Generating views from: {input_path}")
+        print(f"Output directory: {output_dir}")
+    _run_report(args, str(input_path), output_dir)
+    if args.verbose:
+        print(f"\nAll views saved to: {output_dir}")
+    return 0
+def validate_and_setup(args) -> tuple[Path, Path]:
+    """Validate source path and setup output directory."""
+    if not args.source:
+        print("Error: missing required argument: source", file=sys.stderr)
+        print("Usage: code2llm <source> [options]", file=sys.stderr)
+        print("   or: code2llm llm-flow [options]", file=sys.stderr)
+        sys.exit(2)
+    source_path = Path(args.source)
+    if not source_path.exists():
+        print(f"Error: Source path not found: {source_path}", file=sys.stderr)
+        sys.exit(1)
+    output_dir = Path(args.output)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    return source_path, output_dir
+def print_start_info(args, source_path: Path, output_dir: Path) -> None:
+    """Print analysis start information if verbose."""
+    if args.verbose:
+        print(f"Analyzing: {source_path}")
+        print(f"Mode: {args.mode}")
+        print(f"Output: {output_dir}")
+def validate_chunked_output(output_dir: Path, args) -> bool:
+    """Validate generated chunked output.
+    Checks:
+    1. All chunks have required files (analysis.toon, context.md, evolution.toon)
+    2. Files are not empty
+    3. Report summary
+    Returns True if valid, False otherwise.
+    """
+    if not output_dir.exists():
+        print(f"✗ Output directory does not exist: {output_dir}", file=sys.stderr)
+        return False
+    # Find all chunk directories
+    chunk_dirs = [d for d in output_dir.iterdir() if d.is_dir()]
+    if not chunk_dirs:
+        print(f"✗ No chunk directories found in: {output_dir}", file=sys.stderr)
+        return False
+    required_files = ['analysis.toon', 'context.md', 'evolution.toon']
+    issues = []
+    valid_chunks = []
+    print(f"\n🔍 Validating {len(chunk_dirs)} chunks in: {output_dir}")
+    print("-" * 50)
+    for chunk_dir in sorted(chunk_dirs):
+        chunk_name = chunk_dir.name
+        chunk_issues = []
+        for req_file in required_files:
+            file_path = chunk_dir / req_file
+            if not file_path.exists():
+                chunk_issues.append(f"  missing {req_file}")
+            elif file_path.stat().st_size == 0:
+                chunk_issues.append(f"  empty {req_file}")
+        if chunk_issues:
+            issues.append((chunk_name, chunk_issues))
+            print(f"✗ {chunk_name}")
+            for issue in chunk_issues:
+                print(f"    {issue}")
+        else:
+            # Get file sizes
+            sizes = []
+            for req_file in required_files:
+                size = (chunk_dir / req_file).stat().st_size
+                sizes.append(f"{req_file}:{size//1024}KB" if size > 1024 else f"{req_file}:{size}B")
+            valid_chunks.append(chunk_name)
+            print(f"✓ {chunk_name} ({', '.join(sizes)})")
+    print("-" * 50)
+    print(f"\n📊 Validation Summary:")
+    print(f"  Total chunks: {len(chunk_dirs)}")
+    print(f"  Valid: {len(valid_chunks)}")
+    print(f"  Issues: {len(issues)}")
+    if issues:
+        print(f"\n⚠️  {len(issues)} chunk(s) have issues:")
+        for chunk_name, chunk_issues in issues:
+            print(f"    - {chunk_name}")
+        return False
+    else:
+        print(f"\n✅ All {len(valid_chunks)} chunks are valid!")
+        return True
+def generate_llm_context(args_list):
+    """Quick command to generate LLM context only."""
+    import argparse
+    parser = argparse.ArgumentParser(
+        prog='code2llm llm-context',
+        description='Generate LLM-friendly context for a project'
+    )
+    parser.add_argument('source', help='Path to Python project')
+    parser.add_argument('-o', '--output', default='./llm_context.md', help='Output file path')
+    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
+    args = parser.parse_args(args_list)
+    from pathlib import Path
+    from . import ProjectAnalyzer, FAST_CONFIG
+    from .exporters import ContextExporter
+    source_path = Path(args.source)
+    if not source_path.exists():
+        print(f"Error: Source path not found: {source_path}", file=sys.stderr)
+        return 1
+    if args.verbose:
+        print(f"Generating LLM context for: {source_path}")
+    # Use fast config with parallel disabled for stability
+    FAST_CONFIG.performance.parallel_enabled = False
+    analyzer = ProjectAnalyzer(FAST_CONFIG)
+    result = analyzer.analyze_project(str(source_path))
+    exporter = ContextExporter()
+    exporter.export(result, args.output)
+    # Print summary
+    print(f"\n✓ LLM context generated: {args.output}")
+    print(f"  Functions: {len(result.functions)}")
+    print(f"  Classes: {len(result.classes)}")
+    print(f"  Modules: {len(result.modules)}")
+    return 0

code2llm 0.5.51__tar.gz → 0.5.52__tar.gz

code2llm 0.5.51tar.gz → 0.5.52tar.gz