code-analyser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. code_analyser-0.1.0/.dockerignore +66 -0
  2. code_analyser-0.1.0/.env.example +143 -0
  3. code_analyser-0.1.0/CODE-LENS.md +223 -0
  4. code_analyser-0.1.0/DEPLOYMENT.md +358 -0
  5. code_analyser-0.1.0/Dockerfile +56 -0
  6. code_analyser-0.1.0/LICENSE +21 -0
  7. code_analyser-0.1.0/PKG-INFO +283 -0
  8. code_analyser-0.1.0/README.md +244 -0
  9. code_analyser-0.1.0/codelens/__init__.py +7 -0
  10. code_analyser-0.1.0/codelens/__main__.py +19 -0
  11. code_analyser-0.1.0/codelens/analyzers/__init__.py +30 -0
  12. code_analyser-0.1.0/codelens/analyzers/base.py +139 -0
  13. code_analyser-0.1.0/codelens/analyzers/manager.py +207 -0
  14. code_analyser-0.1.0/codelens/analyzers/python_analyzer.py +344 -0
  15. code_analyser-0.1.0/codelens/analyzers/similarity_analyzer.py +512 -0
  16. code_analyser-0.1.0/codelens/api/__init__.py +1 -0
  17. code_analyser-0.1.0/codelens/api/routes/__init__.py +1 -0
  18. code_analyser-0.1.0/codelens/api/routes/analysis.py +441 -0
  19. code_analyser-0.1.0/codelens/api/routes/reports.py +438 -0
  20. code_analyser-0.1.0/codelens/api/routes/rubrics.py +349 -0
  21. code_analyser-0.1.0/codelens/api/schemas.py +305 -0
  22. code_analyser-0.1.0/codelens/cli.py +297 -0
  23. code_analyser-0.1.0/codelens/core/__init__.py +1 -0
  24. code_analyser-0.1.0/codelens/core/config.py +91 -0
  25. code_analyser-0.1.0/codelens/db/__init__.py +1 -0
  26. code_analyser-0.1.0/codelens/db/database.py +57 -0
  27. code_analyser-0.1.0/codelens/main.py +111 -0
  28. code_analyser-0.1.0/codelens/models/__init__.py +14 -0
  29. code_analyser-0.1.0/codelens/models/assignments.py +105 -0
  30. code_analyser-0.1.0/codelens/models/reports.py +172 -0
  31. code_analyser-0.1.0/codelens/models/rubrics.py +76 -0
  32. code_analyser-0.1.0/codelens/services/__init__.py +37 -0
  33. code_analyser-0.1.0/codelens/services/batch_processor.py +508 -0
  34. code_analyser-0.1.0/codelens/services/code_executor.py +310 -0
  35. code_analyser-0.1.0/codelens/services/sandbox.py +375 -0
  36. code_analyser-0.1.0/codelens/services/similarity_service.py +449 -0
  37. code_analyser-0.1.0/codelens/utils/__init__.py +29 -0
  38. code_analyser-0.1.0/codelens/utils/helpers.py +217 -0
  39. code_analyser-0.1.0/docker-compose.prod.yml +91 -0
  40. code_analyser-0.1.0/docker-compose.yml +58 -0
  41. code_analyser-0.1.0/nginx.conf +184 -0
  42. code_analyser-0.1.0/pyproject.toml +120 -0
  43. code_analyser-0.1.0/scripts/deploy/README.md +272 -0
  44. code_analyser-0.1.0/scripts/deploy/docker_deploy.sh +118 -0
  45. code_analyser-0.1.0/scripts/deploy/server_install.sh +204 -0
  46. code_analyser-0.1.0/scripts/deploy/server_update.sh +85 -0
  47. code_analyser-0.1.0/uv.lock +1671 -0
@@ -0,0 +1,66 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ ENV/
11
+ env.bak/
12
+ venv.bak/
13
+
14
+ # Testing
15
+ .pytest_cache/
16
+ .coverage
17
+ htmlcov/
18
+ .tox/
19
+ .nox/
20
+
21
+ # IDE
22
+ .vscode/
23
+ .idea/
24
+ *.swp
25
+ *.swo
26
+
27
+ # OS
28
+ .DS_Store
29
+ .DS_Store?
30
+ ._*
31
+ .Spotlight-V100
32
+ .Trashes
33
+ ehthumbs.db
34
+ Thumbs.db
35
+
36
+ # Project specific
37
+ *.log
38
+ logs/
39
+ temp/
40
+ uploads/
41
+ data/
42
+ backups/
43
+ *.db
44
+ *.sqlite
45
+ *.sqlite3
46
+
47
+ # Git
48
+ .git/
49
+ .gitignore
50
+
51
+ # Docker
52
+ .dockerignore
53
+ Dockerfile*
54
+ docker-compose*.yml
55
+
56
+ # Documentation (keep README.md for build)
57
+ DEPLOYMENT.md
58
+ CODE-LENS.md
59
+ docs/
60
+
61
+ # Development
62
+ .env.local
63
+ .env.dev
64
+ test_*
65
+ tests/
66
+ scripts/dev/
@@ -0,0 +1,143 @@
1
+ # CodeLens Environment Configuration Example
2
+ # Copy this file to .env and customize for your environment
3
+
4
+ # =============================================================================
5
+ # BASIC CONFIGURATION
6
+ # =============================================================================
7
+
8
+ # Environment (development, production)
9
+ CODELENS_ENV=development
10
+
11
+ # Logging level (debug, info, warning, error)
12
+ CODELENS_LOG_LEVEL=info
13
+
14
+ # Server configuration
15
+ CODELENS_HOST=0.0.0.0
16
+ CODELENS_PORT=8000
17
+ CODELENS_WORKERS=1
18
+
19
+ # Application secret key (generate with: openssl rand -hex 32)
20
+ CODELENS_SECRET_KEY=your-secret-key-here
21
+
22
+ # =============================================================================
23
+ # DATABASE CONFIGURATION
24
+ # =============================================================================
25
+
26
+ # SQLite (Development - Default)
27
+ CODELENS_DATABASE__URL=sqlite+aiosqlite:///./codelens.db
28
+
29
+ # PostgreSQL (Production - Recommended)
30
+ # CODELENS_DATABASE__URL=postgresql+asyncpg://username:password@localhost:5432/codelens
31
+
32
+ # Database connection settings
33
+ CODELENS_DATABASE__ECHO=false
34
+ CODELENS_DATABASE__POOL_SIZE=5
35
+ CODELENS_DATABASE__MAX_OVERFLOW=10
36
+
37
+ # =============================================================================
38
+ # ANALYSIS CONFIGURATION
39
+ # =============================================================================
40
+
41
+ # Code analysis tools
42
+ CODELENS_ANALYZER__RUFF_ENABLED=true
43
+ CODELENS_ANALYZER__MYPY_ENABLED=true
44
+ CODELENS_ANALYZER__CHECK_TYPE_HINTS=true
45
+ CODELENS_ANALYZER__CHECK_DOCSTRINGS=true
46
+
47
+ # Analysis limits
48
+ CODELENS_ANALYZER__MAX_COMPLEXITY=10
49
+ CODELENS_ANALYZER__MAX_LINE_LENGTH=88
50
+ CODELENS_ANALYZER__EXECUTION_TIMEOUT=30
51
+ CODELENS_ANALYZER__MEMORY_LIMIT=128m
52
+ CODELENS_ANALYZER__CPU_LIMIT=0.5
53
+
54
+ # =============================================================================
55
+ # SIMILARITY DETECTION
56
+ # =============================================================================
57
+
58
+ # Enable similarity checking
59
+ CODELENS_SIMILARITY__ENABLED=true
60
+ CODELENS_SIMILARITY__THRESHOLD=0.8
61
+ CODELENS_SIMILARITY__METHODS=["ast_structural", "token_based"]
62
+
63
+ # AI baseline comparison
64
+ CODELENS_SIMILARITY__USE_AI_BASELINES=false
65
+ CODELENS_SIMILARITY__AI_BASELINE_COUNT=5
66
+
67
+ # =============================================================================
68
+ # SECURITY CONFIGURATION
69
+ # =============================================================================
70
+
71
+ # Docker integration (for code execution)
72
+ CODELENS_DOCKER_ENABLED=true
73
+
74
+ # Upload limits
75
+ CODELENS_MAX_FILE_SIZE=10485760 # 10MB in bytes
76
+ CODELENS_MAX_FILES_PER_BATCH=100
77
+
78
+ # API rate limiting
79
+ CODELENS_RATE_LIMIT_ENABLED=true
80
+ CODELENS_RATE_LIMIT_REQUESTS=100
81
+ CODELENS_RATE_LIMIT_WINDOW=3600 # 1 hour in seconds
82
+
83
+ # =============================================================================
84
+ # EXTERNAL SERVICES (Optional)
85
+ # =============================================================================
86
+
87
+ # Redis (for caching and background tasks)
88
+ # CODELENS_REDIS_URL=redis://localhost:6379/0
89
+
90
+ # Email configuration (for notifications)
91
+ # CODELENS_EMAIL__SMTP_HOST=smtp.example.com
92
+ # CODELENS_EMAIL__SMTP_PORT=587
93
+ # CODELENS_EMAIL__USERNAME=your-email@example.com
94
+ # CODELENS_EMAIL__PASSWORD=your-app-password
95
+ # CODELENS_EMAIL__USE_TLS=true
96
+
97
+ # =============================================================================
98
+ # MONITORING AND LOGGING
99
+ # =============================================================================
100
+
101
+ # Structured logging format
102
+ CODELENS_LOG_FORMAT=json
103
+
104
+ # Health check configuration
105
+ CODELENS_HEALTH_CHECK_TIMEOUT=30
106
+
107
+ # Metrics collection
108
+ CODELENS_METRICS_ENABLED=false
109
+ # CODELENS_METRICS_ENDPOINT=/metrics
110
+
111
+ # =============================================================================
112
+ # DEVELOPMENT SETTINGS
113
+ # =============================================================================
114
+
115
+ # Auto-reload on file changes (development only)
116
+ CODELENS_AUTO_RELOAD=false
117
+
118
+ # API documentation (always enabled for educational tool)
119
+ CODELENS_DOCS_ENABLED=true
120
+
121
+ # Debug mode
122
+ CODELENS_DEBUG=false
123
+
124
+ # =============================================================================
125
+ # PRODUCTION OVERRIDES
126
+ # =============================================================================
127
+
128
+ # For production deployment, uncomment and configure:
129
+
130
+ # Database (PostgreSQL recommended for production)
131
+ # CODELENS_DATABASE__URL=postgresql+asyncpg://codelens:secure_password@localhost:5432/codelens
132
+
133
+ # Performance
134
+ # CODELENS_WORKERS=4
135
+ # CODELENS_LOG_LEVEL=warning
136
+
137
+ # Security
138
+ # CODELENS_SECRET_KEY=your-super-secure-secret-key-generated-randomly
139
+ # CODELENS_RATE_LIMIT_ENABLED=true
140
+ # CODELENS_DOCS_ENABLED=true # Keep enabled for educational tool
141
+
142
+ # Caching
143
+ # CODELENS_REDIS_URL=redis://localhost:6379/0
@@ -0,0 +1,223 @@
1
+ # CodeLens - Code Analysis Microservice
2
+
3
+ **Automated Code Analysis & Grading Assistant for Educators**
4
+
5
+ ## 🎯 Vision
6
+
7
+ A dedicated microservice for analyzing, validating, and grading student code submissions across multiple programming languages. Designed to help educators provide consistent, comprehensive feedback while reducing manual grading time. Designed for introductor programming courses where a complete development stak, like linters, typecheckers etc may not have been setup.
8
+
9
+ ## 🚀 Core Purpose
10
+
11
+ Unlike DocumentLens (focused on natural language and academic text), CodeLens specializes in:
12
+ - Static code analysis and quality metrics
13
+ - Syntax validation and error detection
14
+ - Code similarity/plagiarism detection
15
+ - Automated test execution
16
+ - Grading rubric application
17
+ - Constructive feedback generation
18
+
19
+ ## 📊 Target Languages (MVP)
20
+
21
+ 1. **Python** - Full AST analysis, complexity metrics, PEP8 compliance
22
+ 2. **HTML** - W3C validation, accessibility checks, semantic structure
23
+ 3. **CSS** - Validation, best practices, browser compatibility
24
+ 4. **JavaScript** - Syntax validation, ESLint rules, common pitfalls
25
+
26
+ ## 🏗️ Proposed Architecture
27
+
28
+ ```
29
+ codelens/
30
+ ├── app/
31
+ │ ├── analyzers/
32
+ │ │ ├── python_analyzer.py # AST parsing, complexity, style
33
+ │ │ ├── html_analyzer.py # Structure, validation, accessibility
34
+ │ │ ├── css_analyzer.py # Validation, best practices
35
+ │ │ ├── js_analyzer.py # Syntax, linting, patterns
36
+ │ │ └── similarity_checker.py # Cross-submission similarity
37
+ │ │
38
+ │ ├── services/
39
+ │ │ ├── code_executor.py # Safe code execution sandbox
40
+ │ │ ├── test_runner.py # Run unit tests
41
+ │ │ ├── w3c_validator.py # W3C API integration
42
+ │ │ ├── feedback_generator.py # AI-assisted feedback
43
+ │ │ └── rubric_evaluator.py # Apply grading rubrics
44
+ │ │
45
+ │ ├── validators/
46
+ │ │ ├── syntax_validator.py # Language-specific syntax checks
47
+ │ │ ├── security_scanner.py # Security vulnerability detection
48
+ │ │ └── best_practices.py # Coding standards enforcement
49
+ │ │
50
+ │ └── api/
51
+ │ └── routes/
52
+ │ ├── analyze.py # Single file analysis
53
+ │ ├── batch.py # Batch submission processing
54
+ │ ├── rubric.py # Rubric management
55
+ │ └── reports.py # Grade report generation
56
+ ```
57
+
58
+ ## 🔧 Key Features
59
+
60
+ ### 1. Code Quality Analysis
61
+ - **Complexity Metrics**: Cyclomatic complexity, nesting depth, LOC
62
+ - **Style Compliance**: PEP8 (Python), ESLint (JS), W3C (HTML/CSS)
63
+ - **Code Smells**: Duplicate code, long methods, unused variables
64
+ - **Documentation**: Docstring coverage, comment quality
65
+
66
+ ### 2. Correctness Validation
67
+ - **Syntax Checking**: Language-specific parsers
68
+ - **Type Checking**: Static type analysis where applicable
69
+ - **Logic Errors**: Common mistakes and anti-patterns
70
+ - **Output Validation**: Expected vs actual output comparison
71
+
72
+ ### 3. Plagiarism Detection
73
+ - **Structural Similarity**: AST-based comparison
74
+ - **Token Analysis**: Variable renaming detection
75
+ - **Cross-Cohort**: Compare across student submissions
76
+ - **External Sources**: Check against online repositories
77
+
78
+ ### 4. Educational Feedback
79
+ - **Constructive Comments**: Explain what's wrong and why
80
+ - **Improvement Suggestions**: How to fix issues
81
+ - **Learning Resources**: Links to relevant documentation
82
+ - **Progress Tracking**: Performance over time
83
+
84
+ ## 📡 API Design
85
+
86
+ ### Core Endpoints
87
+
88
+ ```
89
+ POST /api/analyze/python
90
+ POST /api/analyze/web # HTML/CSS/JS bundle
91
+ POST /api/analyze/batch # Multiple submissions
92
+
93
+ GET /api/rubrics # Available grading rubrics
94
+ POST /api/rubrics # Create custom rubric
95
+
96
+ POST /api/compare # Similarity checking
97
+ GET /api/reports/{id} # Detailed analysis report
98
+ ```
99
+
100
+ ### Example Request/Response
101
+
102
+ ```json
103
+ // Request
104
+ {
105
+ "code": "def calculate_grade(score):\n return score * 100",
106
+ "language": "python",
107
+ "rubric_id": "intro-python-assignment-1",
108
+ "check_similarity": true,
109
+ "cohort_id": "CS101-2024"
110
+ }
111
+
112
+ // Response
113
+ {
114
+ "analysis": {
115
+ "syntax": {
116
+ "valid": true,
117
+ "errors": []
118
+ },
119
+ "quality": {
120
+ "complexity": 1,
121
+ "style_issues": [
122
+ {
123
+ "line": 1,
124
+ "issue": "Missing function docstring",
125
+ "severity": "minor"
126
+ }
127
+ ]
128
+ },
129
+ "correctness": {
130
+ "test_results": "3/5 passed",
131
+ "logic_issues": ["No input validation"]
132
+ },
133
+ "similarity": {
134
+ "highest_match": 0.15,
135
+ "flagged": false
136
+ }
137
+ },
138
+ "grade": {
139
+ "score": 75,
140
+ "breakdown": {
141
+ "functionality": 30,
142
+ "style": 15,
143
+ "documentation": 10,
144
+ "testing": 20
145
+ }
146
+ },
147
+ "feedback": {
148
+ "strengths": ["Clean function structure"],
149
+ "improvements": ["Add input validation", "Include docstring"],
150
+ "resources": ["https://peps.python.org/pep-0257/"]
151
+ }
152
+ }
153
+ ```
154
+
155
+ ## 🛠️ Technology Stack
156
+
157
+ ### Core Dependencies
158
+ - **Python Analysis** (configurable options): `ast`, `pylint`, `mypy`, `black`, `radon`, `ruff`, `basedpyright`
159
+ - **Web Validation**: `html5lib`, `cssutils`, `beautifulsoup4`
160
+ - **JavaScript**: `esprima` (via subprocess), `jshint`
161
+ - **Similarity**: `difflib`, `python-Levenshtein`, custom AST comparison
162
+ - **Sandboxing**: `docker` or `firejail` for safe execution
163
+
164
+ ### External Services
165
+ - **W3C Validator**: Optional for official HTML/CSS validation
166
+ - **GitHub API**: Check against public repositories
167
+ - **OpenAI API**: Optional for enhanced feedback generation
168
+
169
+ ## 🚦 Implementation Phases
170
+
171
+ ### Phase 1: Python Analysis (Week 1-2)
172
+ - AST-based analysis
173
+ - Style checking (PEP8)
174
+ - Basic complexity metrics
175
+ - Simple test runner
176
+
177
+ ### Phase 2: Web Technologies (Week 3-4)
178
+ - HTML structure validation
179
+ - CSS validation
180
+ - JavaScript syntax checking
181
+ - Basic accessibility checks
182
+
183
+ ### Phase 3: Similarity Detection (Week 5)
184
+ - Token-based comparison
185
+ - AST structural similarity
186
+ - Cohort-wide checking
187
+
188
+ ### Phase 4: Grading & Feedback (Week 6)
189
+ - Rubric system
190
+ - Automated scoring
191
+ - Feedback generation
192
+ - Report creation
193
+
194
+ ## 🔒 Security Considerations
195
+
196
+ 1. **Code Execution Sandbox**: Never execute student code directly
197
+ 2. **Resource Limits**: CPU, memory, and time constraints
198
+ 3. **Input Sanitization**: Prevent injection attacks
199
+ 4. **Access Control**: Educator-only endpoints
200
+ 5. **Data Privacy**: Secure storage of student submissions
201
+
202
+ ## 📈 Future Enhancements
203
+
204
+ - **More Languages**: Java, C++, SQL, R
205
+ - **IDE Integration**: VS Code extension
206
+ - **Real-time Analysis**: Live coding feedback
207
+ - **Peer Review**: Student cross-evaluation
208
+ - **Learning Analytics**: Track common mistakes
209
+ - **AI Tutoring**: Personalized learning paths
210
+
211
+ ## 🎓 Educational Impact
212
+
213
+ CodeLens aims to:
214
+ - Provide consistent, objective grading
215
+ - Reduce educator workload
216
+ - Give students immediate feedback
217
+ - Identify struggling students early
218
+ - Track learning progress over time
219
+ - Encourage best practices from the start
220
+
221
+ ---
222
+
223
+ *CodeLens: Empowering educators with intelligent code analysis*