PyPI - code-analyser - Versions diffs - 0.1.0__py3-none-any.whl - Mend

code-analyser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

code_analyser-0.1.0.dist-info/METADATA +283 -0
code_analyser-0.1.0.dist-info/RECORD +34 -0
code_analyser-0.1.0.dist-info/WHEEL +4 -0
code_analyser-0.1.0.dist-info/licenses/LICENSE +21 -0
codelens/__init__.py +7 -0
codelens/__main__.py +19 -0
codelens/analyzers/__init__.py +30 -0
codelens/analyzers/base.py +139 -0
codelens/analyzers/manager.py +207 -0
codelens/analyzers/python_analyzer.py +344 -0
codelens/analyzers/similarity_analyzer.py +512 -0
codelens/api/__init__.py +1 -0
codelens/api/routes/__init__.py +1 -0
codelens/api/routes/analysis.py +441 -0
codelens/api/routes/reports.py +438 -0
codelens/api/routes/rubrics.py +349 -0
codelens/api/schemas.py +305 -0
codelens/cli.py +297 -0
codelens/core/__init__.py +1 -0
codelens/core/config.py +91 -0
codelens/db/__init__.py +1 -0
codelens/db/database.py +57 -0
codelens/main.py +111 -0
codelens/models/__init__.py +14 -0
codelens/models/assignments.py +105 -0
codelens/models/reports.py +172 -0
codelens/models/rubrics.py +76 -0
codelens/services/__init__.py +37 -0
codelens/services/batch_processor.py +508 -0
codelens/services/code_executor.py +310 -0
codelens/services/sandbox.py +375 -0
codelens/services/similarity_service.py +449 -0
codelens/utils/__init__.py +29 -0
codelens/utils/helpers.py +217 -0

codelens/cli.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""
+Command-line interface for CodeLens batch processing
+"""
+import argparse
+import asyncio
+import json
+import sys
+from pathlib import Path
+from typing import Any
+import structlog
+from codelens.core.config import settings
+from codelens.services.batch_processor import BatchProcessingConfig, batch_processor
+from codelens.utils import calculate_grade_letter, format_file_size
+logger = structlog.get_logger()
+async def process_directory_command(args: Any) -> int:
+    """Process a directory of code submissions"""
+    try:
+        print(f"CodeLens Batch Processor v{settings.version}")
+        print(f"Processing directory: {args.directory}")
+        print("-" * 50)
+        # Configure batch processor
+        config = BatchProcessingConfig(
+            parallel_processing=not args.sequential,
+            max_concurrent=args.max_concurrent,
+            skip_unsupported_files=not args.include_unsupported,
+            extract_student_info=args.extract_student_info,
+            default_language=args.language
+        )
+        # Override student ID patterns if provided
+        if args.student_id_patterns:
+            config.student_id_patterns = args.student_id_patterns.split(',')
+        # Create batch processor with config
+        processor = batch_processor.__class__(config)
+        # Process directory
+        result = await processor.process_directory(
+            directory_path=args.directory,
+            assignment_id=args.assignment_id,
+            rubric_id=args.rubric_id,
+            language=args.language
+        )
+        # Display results
+        print("\n" + "=" * 60)
+        print("BATCH PROCESSING RESULTS")
+        print("=" * 60)
+        print(f"Batch ID: {result.batch_id}")
+        print(f"Total Files: {result.total_files}")
+        print(f"Processed: {result.processed_files}")
+        print(f"Failed: {result.failed_files}")
+        print(f"Success Rate: {(result.processed_files / result.total_files * 100):.1f}%" if result.total_files > 0 else "0%")
+        print(f"Processing Time: {result.processing_time:.2f} seconds")
+        if result.average_score is not None:
+            print(f"Average Score: {result.average_score:.1f}%")
+            print(f"Average Grade: {calculate_grade_letter(result.average_score)}")
+        # Score distribution
+        if result.score_distribution:
+            print("\nScore Distribution:")
+            for grade_range, count in result.score_distribution.items():
+                percentage = (count / result.processed_files * 100) if result.processed_files > 0 else 0
+                print(f"  {grade_range}: {count} students ({percentage:.1f}%)")
+        # Errors
+        if result.errors:
+            print("\nErrors:")
+            for i, error in enumerate(result.errors[:10], 1):  # Show first 10 errors
+                print(f"  {i}. {error}")
+            if len(result.errors) > 10:
+                print(f"  ... and {len(result.errors) - 10} more errors")
+        # Detailed results
+        if args.detailed and result.results:
+            print("\n" + "-" * 60)
+            print("DETAILED RESULTS")
+            print("-" * 60)
+            for i, res in enumerate(result.results[:args.max_details], 1):
+                print(f"\n{i}. Submission: {res.submission_id}")
+                print(f"   Success: {res.success}")
+                if res.total_score is not None:
+                    print(f"   Score: {res.total_score:.1f}% ({calculate_grade_letter(res.total_score)})")
+                print(f"   Issues: {len(res.issues)}")
+                if res.metrics:
+                    print(f"   LOC: {res.metrics.lines_of_code}")
+                    print(f"   Complexity: {res.metrics.cyclomatic_complexity}")
+                if res.error_message:
+                    print(f"   Error: {res.error_message}")
+        # Save results to file if requested
+        if args.output:
+            output_data = {
+                "batch_id": result.batch_id,
+                "summary": {
+                    "total_files": result.total_files,
+                    "processed_files": result.processed_files,
+                    "failed_files": result.failed_files,
+                    "processing_time": result.processing_time,
+                    "average_score": result.average_score,
+                    "score_distribution": result.score_distribution
+                },
+                "results": [
+                    {
+                        "submission_id": r.submission_id,
+                        "success": r.success,
+                        "total_score": r.total_score,
+                        "issues_count": len(r.issues),
+                        "metrics": r.metrics.dict() if r.metrics else None,
+                        "error_message": r.error_message
+                    }
+                    for r in result.results
+                ],
+                "errors": result.errors
+            }
+            with open(args.output, 'w') as f:
+                json.dump(output_data, f, indent=2, default=str)
+            print(f"\nResults saved to: {args.output}")
+        return 0 if result.success else 1
+    except Exception as e:
+        logger.error("Directory processing failed", error=str(e))
+        print(f"Error: {str(e)}", file=sys.stderr)
+        return 1
+async def analyze_single_file(args: Any) -> int:
+    """Analyze a single code file"""
+    try:
+        file_path = Path(args.file)
+        if not file_path.exists():
+            print(f"Error: File {args.file} does not exist", file=sys.stderr)
+            return 1
+        print(f"Analyzing file: {file_path}")
+        print(f"File size: {format_file_size(file_path.stat().st_size)}")
+        print("-" * 40)
+        # Read file content
+        with open(file_path, encoding='utf-8') as f:
+            code = f.read()
+        # Process as single file batch
+        files_data = [{
+            "code": code,
+            "path": str(file_path),
+            "student_id": args.student_id,
+            "student_name": args.student_name
+        }]
+        result = await batch_processor.process_files_list(
+            files_data=files_data,
+            assignment_id=args.assignment_id,
+            rubric_id=args.rubric_id,
+            language=args.language
+        )
+        if result.results:
+            res = result.results[0]
+            print(f"Analysis completed in {result.processing_time:.2f} seconds")
+            print(f"Success: {res.success}")
+            if res.total_score is not None:
+                print(f"Score: {res.total_score:.1f}% ({calculate_grade_letter(res.total_score)})")
+            if res.metrics:
+                print("\nCode Metrics:")
+                print(f"  Lines of Code: {res.metrics.lines_of_code}")
+                print(f"  Cyclomatic Complexity: {res.metrics.cyclomatic_complexity}")
+                print(f"  Functions: {res.metrics.function_count}")
+                print(f"  Classes: {res.metrics.class_count}")
+            if res.issues:
+                print(f"\nIssues Found ({len(res.issues)}):")
+                for i, issue in enumerate(res.issues[:10], 1):  # Show first 10 issues
+                    print(f"  {i}. Line {issue.line}: {issue.message} ({issue.severity.value})")
+                if len(res.issues) > 10:
+                    print(f"  ... and {len(res.issues) - 10} more issues")
+            if res.error_message:
+                print(f"\nError: {res.error_message}")
+        return 0 if result.success else 1
+    except Exception as e:
+        logger.error("File analysis failed", error=str(e))
+        print(f"Error: {str(e)}", file=sys.stderr)
+        return 1
+def create_parser() -> argparse.ArgumentParser:
+    """Create command-line argument parser"""
+    parser = argparse.ArgumentParser(
+        description="CodeLens - Automated Code Analysis for Educational Use",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Process a directory of Python submissions
+  python -m codelens.cli batch /path/to/submissions --language python
+  # Process with specific rubric and assignment
+  python -m codelens.cli batch /path/to/submissions --rubric-id 1 --assignment-id 5
+  # Analyze a single file
+  python -m codelens.cli analyze submission.py --student-id cs123456
+  # Generate detailed report with output file
+  python -m codelens.cli batch /submissions --detailed --output results.json
+        """
+    )
+    subparsers = parser.add_subparsers(dest='command', help='Available commands')
+    # Batch processing command
+    batch_parser = subparsers.add_parser('batch', help='Process directory of code submissions')
+    batch_parser.add_argument('directory', help='Directory containing code submissions')
+    batch_parser.add_argument('--language', default='python',
+                            help='Programming language (default: python)')
+    batch_parser.add_argument('--assignment-id', type=int,
+                            help='Assignment ID for database storage')
+    batch_parser.add_argument('--rubric-id', type=int,
+                            help='Rubric ID for grading')
+    batch_parser.add_argument('--sequential', action='store_true',
+                            help='Process files sequentially instead of parallel')
+    batch_parser.add_argument('--max-concurrent', type=int, default=5,
+                            help='Maximum concurrent processing (default: 5)')
+    batch_parser.add_argument('--include-unsupported', action='store_true',
+                            help='Include unsupported file types')
+    batch_parser.add_argument('--no-extract-student-info', dest='extract_student_info',
+                            action='store_false', default=True,
+                            help='Disable automatic student info extraction')
+    batch_parser.add_argument('--student-id-patterns',
+                            help='Comma-separated regex patterns for student ID extraction')
+    batch_parser.add_argument('--detailed', action='store_true',
+                            help='Show detailed results for each submission')
+    batch_parser.add_argument('--max-details', type=int, default=20,
+                            help='Maximum detailed results to show (default: 20)')
+    batch_parser.add_argument('--output', '-o',
+                            help='Output file for results (JSON format)')
+    # Single file analysis command
+    analyze_parser = subparsers.add_parser('analyze', help='Analyze a single code file')
+    analyze_parser.add_argument('file', help='Code file to analyze')
+    analyze_parser.add_argument('--language', default='python',
+                              help='Programming language (default: python)')
+    analyze_parser.add_argument('--student-id', help='Student ID')
+    analyze_parser.add_argument('--student-name', help='Student name')
+    analyze_parser.add_argument('--assignment-id', type=int,
+                              help='Assignment ID for database storage')
+    analyze_parser.add_argument('--rubric-id', type=int,
+                              help='Rubric ID for grading')
+    return parser
+async def main() -> int:
+    """Main CLI entry point"""
+    parser = create_parser()
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return 1
+    if args.command == 'batch':
+        return await process_directory_command(args)
+    elif args.command == 'analyze':
+        return await analyze_single_file(args)
+    else:
+        parser.print_help()
+        return 1
+if __name__ == "__main__":
+    try:
+        exit_code = asyncio.run(main())
+        sys.exit(exit_code)
+    except KeyboardInterrupt:
+        print("\nInterrupted by user", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Unexpected error: {str(e)}", file=sys.stderr)
+        sys.exit(1)

codelens/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core application components"""

codelens/core/config.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""
+Application configuration management
+"""
+from pydantic import BaseModel, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class AnalyzerConfig(BaseModel):
+    """Configuration for code analysis tools"""
+    # Python analyzers
+    ruff_enabled: bool = True
+    ruff_config: str | None = None
+    mypy_enabled: bool = True
+    mypy_config: str | None = None
+    # Analysis options
+    max_complexity: int = 10
+    max_line_length: int = 88
+    check_type_hints: bool = True
+    check_docstrings: bool = True
+    # Execution limits
+    execution_timeout: int = 30  # seconds
+    memory_limit: str = "128m"  # Docker memory limit
+    cpu_limit: str = "0.5"  # Docker CPU limit
+class SimilarityConfig(BaseModel):
+    """Configuration for similarity detection"""
+    enabled: bool = True
+    threshold: float = 0.8  # Similarity threshold for flagging
+    methods: list[str] = ["ast_structural", "token_based"]
+    # AI-generated baseline comparison
+    use_ai_baselines: bool = True
+    ai_baseline_count: int = 5
+class DatabaseConfig(BaseModel):
+    """Database configuration"""
+    url: str = "sqlite+aiosqlite:///./codelens.db"
+    echo: bool = False  # SQL logging
+    pool_size: int = 5
+    max_overflow: int = 10
+class Settings(BaseSettings):
+    """Application settings"""
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False
+    )
+    # Application
+    app_name: str = "CodeLens"
+    debug: bool = False
+    version: str = "0.1.0"
+    # API
+    api_prefix: str = "/api/v1"
+    host: str = "localhost"
+    port: int = 8000
+    docs_enabled: bool = True  # Always enable Swagger docs for educational tool
+    # Security
+    secret_key: str = Field(default="your-secret-key-change-in-production")
+    access_token_expire_minutes: int = 30
+    # Analysis configuration
+    analyzer: AnalyzerConfig = Field(default_factory=AnalyzerConfig)
+    similarity: SimilarityConfig = Field(default_factory=SimilarityConfig)
+    database: DatabaseConfig = Field(default_factory=DatabaseConfig)
+    # Docker settings
+    docker_enabled: bool = True
+    docker_image: str = "python:3.11-slim"
+    # File limits
+    max_file_size: int = 1024 * 1024  # 1MB
+    max_files_per_batch: int = 100
+# Global settings instance
+settings = Settings()

codelens/db/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Database configuration and connection"""

codelens/db/database.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Database configuration and connection management
+"""
+from typing import AsyncGenerator
+import structlog
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+from sqlalchemy.orm import DeclarativeBase
+from codelens.core.config import settings
+logger = structlog.get_logger()
+# Create async engine
+engine = create_async_engine(
+    settings.database.url,
+    echo=settings.database.echo,
+    pool_size=settings.database.pool_size,
+    max_overflow=settings.database.max_overflow,
+)
+# Create async session maker
+AsyncSessionLocal = async_sessionmaker(
+    bind=engine,
+    class_=AsyncSession,
+    expire_on_commit=False,
+)
+class Base(DeclarativeBase):
+    """Base class for all database models"""
+    pass
+async def get_db() -> AsyncGenerator[AsyncSession, None]:
+    """Dependency to get database session"""
+    async with AsyncSessionLocal() as session:
+        try:
+            yield session
+        except Exception as e:
+            logger.error("Database session error", error=str(e))
+            await session.rollback()
+            raise
+        finally:
+            await session.close()
+async def init_db() -> None:
+    """Initialize database tables"""
+    async with engine.begin() as conn:
+        # Import all models to ensure they're registered
+        # Create all tables
+        await conn.run_sync(Base.metadata.create_all)
+    logger.info("Database tables initialized")

codelens/main.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""
+FastAPI application entry point
+"""
+from typing import Any
+import structlog
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from codelens.api.routes import analysis, reports, rubrics
+from codelens.core.config import settings
+from codelens.db.database import init_db
+# Configure structured logging
+structlog.configure(
+    processors=[
+        structlog.stdlib.filter_by_level,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.PositionalArgumentsFormatter(),
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.format_exc_info,
+        structlog.processors.UnicodeDecoder(),
+        structlog.processors.JSONRenderer()
+    ],
+    context_class=dict,
+    logger_factory=structlog.stdlib.LoggerFactory(),
+    cache_logger_on_first_use=True,
+)
+logger = structlog.get_logger()
+def create_app() -> FastAPI:
+    """Create FastAPI application"""
+    app = FastAPI(
+        title=settings.app_name,
+        description="Automated Code Analysis & Grading Assistant for Educators",
+        version=settings.version,
+        debug=settings.debug,
+        openapi_url="/openapi.json" if settings.docs_enabled else None,
+        docs_url="/docs" if settings.docs_enabled else None,
+        redoc_url="/redoc" if settings.docs_enabled else None,
+    )
+    # CORS middleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"] if settings.debug else [],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Include routers
+    app.include_router(
+        analysis.router,
+        prefix=f"{settings.api_prefix}/analyze",
+        tags=["analysis"]
+    )
+    app.include_router(
+        rubrics.router,
+        prefix=f"{settings.api_prefix}/rubrics",
+        tags=["rubrics"]
+    )
+    app.include_router(
+        reports.router,
+        prefix=f"{settings.api_prefix}/reports",
+        tags=["reports"]
+    )
+    @app.on_event("startup")
+    async def startup_event() -> None:
+        """Initialize application on startup"""
+        logger.info("Starting CodeLens application")
+        await init_db()
+        logger.info("Database initialized")
+    @app.on_event("shutdown")
+    async def shutdown_event() -> None:
+        """Cleanup on shutdown"""
+        logger.info("Shutting down CodeLens application")
+    @app.get("/health")
+    async def health_check() -> dict[str, Any]:
+        """Health check endpoint"""
+        return {
+            "status": "healthy",
+            "version": settings.version,
+            "app": settings.app_name
+        }
+    return app
+# Create app instance
+app = create_app()
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "codelens.main:app",
+        host=settings.host,
+        port=settings.port,
+        reload=settings.debug,
+        log_config=None,  # Use structlog configuration
+    )

codelens/models/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Database models"""
+from .assignments import Assignment, TestCase
+from .reports import AnalysisReport, SimilarityMatch
+from .rubrics import Rubric, RubricCriterion
+__all__ = [
+    "Rubric",
+    "RubricCriterion",
+    "Assignment",
+    "TestCase",
+    "AnalysisReport",
+    "SimilarityMatch",
+]

codelens/models/assignments.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+Database models for assignments and specifications
+"""
+from datetime import datetime
+from typing import TYPE_CHECKING
+from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.sql import func
+from codelens.db.database import Base
+if TYPE_CHECKING:
+    from .reports import AnalysisReport
+    from .rubrics import Rubric
+class Assignment(Base):
+    """Assignment specification and requirements"""
+    __tablename__ = "assignments"
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
+    name: Mapped[str] = mapped_column(String(200), nullable=False, index=True)
+    description: Mapped[str] = mapped_column(Text, nullable=False)
+    # Course information
+    course_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
+    course_name: Mapped[str | None] = mapped_column(String(200), nullable=True)
+    semester: Mapped[str | None] = mapped_column(String(20), nullable=True)
+    # Assignment configuration
+    language: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    rubric_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("rubrics.id"), nullable=False, index=True
+    )
+    # Requirements and specifications
+    requirements: Mapped[dict] = mapped_column(JSON, nullable=False)  # Technical requirements
+    test_cases: Mapped[dict | None] = mapped_column(JSON, nullable=True)  # Expected outputs
+    starter_code: Mapped[str | None] = mapped_column(Text, nullable=True)
+    # Similarity checking configuration
+    similarity_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
+    similarity_threshold: Mapped[float] = mapped_column(nullable=False, default=0.8)
+    cross_cohort_check: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    # AI baseline configuration
+    ai_baselines: Mapped[dict | None] = mapped_column(JSON, nullable=True)  # Generated code variants
+    # Deadlines
+    due_date: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    late_penalty: Mapped[float | None] = mapped_column(nullable=True, default=0.0)  # Per day penalty
+    # Metadata
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+        nullable=False
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+        onupdate=func.now(),
+        nullable=False
+    )
+    # Relationships
+    rubric: Mapped["Rubric"] = relationship("Rubric", back_populates="assignments")
+    reports: Mapped[list["AnalysisReport"]] = relationship(
+        "AnalysisReport", back_populates="assignment", cascade="all, delete-orphan"
+    )
+class TestCase(Base):
+    """Test cases for assignment validation"""
+    __tablename__ = "test_cases"
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
+    assignment_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("assignments.id"), nullable=False, index=True
+    )
+    # Test case details
+    name: Mapped[str] = mapped_column(String(100), nullable=False)
+    description: Mapped[str | None] = mapped_column(Text, nullable=True)
+    test_type: Mapped[str] = mapped_column(String(50), nullable=False)  # unit, integration, etc
+    # Test configuration
+    input_data: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+    expected_output: Mapped[str | None] = mapped_column(Text, nullable=True)
+    test_code: Mapped[str | None] = mapped_column(Text, nullable=True)  # Custom test code
+    # Scoring
+    points: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
+    required: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
+    # Metadata
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+        nullable=False
+    )