lumen-py 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lumen_py-1.0.0/PKG-INFO +15 -0
- lumen_py-1.0.0/README.md +66 -0
- lumen_py-1.0.0/deep_mapper.py +138 -0
- lumen_py-1.0.0/llm_handler.py +26 -0
- lumen_py-1.0.0/lumen_py.egg-info/PKG-INFO +15 -0
- lumen_py-1.0.0/lumen_py.egg-info/SOURCES.txt +14 -0
- lumen_py-1.0.0/lumen_py.egg-info/dependency_links.txt +1 -0
- lumen_py-1.0.0/lumen_py.egg-info/entry_points.txt +2 -0
- lumen_py-1.0.0/lumen_py.egg-info/requires.txt +7 -0
- lumen_py-1.0.0/lumen_py.egg-info/top_level.txt +6 -0
- lumen_py-1.0.0/main.py +138 -0
- lumen_py-1.0.0/setup.cfg +4 -0
- lumen_py-1.0.0/setup.py +26 -0
- lumen_py-1.0.0/socratic_engine.py +49 -0
- lumen_py-1.0.0/train_deep_model.py +58 -0
- lumen_py-1.0.0/visualizer.py +38 -0
lumen_py-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lumen-py
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: An AI-powered, local, Object-Oriented coding mentor.
|
|
5
|
+
Author: Bivo
|
|
6
|
+
Requires-Dist: torch
|
|
7
|
+
Requires-Dist: transformers
|
|
8
|
+
Requires-Dist: datasets
|
|
9
|
+
Requires-Dist: typer
|
|
10
|
+
Requires-Dist: rich
|
|
11
|
+
Requires-Dist: pyperclip
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Dynamic: author
|
|
14
|
+
Dynamic: requires-dist
|
|
15
|
+
Dynamic: summary
|
lumen_py-1.0.0/README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Lumen-Py 🧠
|
|
2
|
+
|
|
3
|
+
An AI-powered, local, Object-Oriented coding mentor designed to fight "AI Brain Rot."
|
|
4
|
+
|
|
5
|
+
Instead of writing the code for you and making you dependent, Lumen-Py uses a locally fine-tuned PyTorch Deep Learning model to analyze your architectural maturity, and a local Socratic LLM to guide you to the answers yourself.
|
|
6
|
+
|
|
7
|
+
## 🌟 What It Offers (Features)
|
|
8
|
+
* **System-Level Architecture Scanning:** Point Lumen at an entire project directory. It will crawl your files, map your dependencies, and critique your overall system design (e.g., pointing out tightly coupled classes or poor separation of concerns).
|
|
9
|
+
* **Deep Learning Code Classifier:** Uses a locally fine-tuned Hugging Face `CodeBERT` model to mathematically classify your Python scripts as "Junior" or "Senior" architecture.
|
|
10
|
+
* **Socratic Sledgehammer:** Integrates with local LLMs via Ollama to review code and ask targeted, Socratic questions instead of just printing the solution.
|
|
11
|
+
* **Real-Time Token Streaming:** A seamless, matrix-style terminal UI that streams the AI's responses character-by-character, complete with live Markdown rendering.
|
|
12
|
+
* **Mermaid.js Visualizer:** Automatically translates architectural concepts into flowchart code that copies straight to your clipboard.
|
|
13
|
+
|
|
14
|
+
## ⚙️ Installation & Setup
|
|
15
|
+
|
|
16
|
+
1. **Clone the repository:**
|
|
17
|
+
```bash
|
|
18
|
+
git clone https://github.com/Bivo2004/Lumen-Py.git
|
|
19
|
+
cd Lumen-Py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
2. **Install the dependencies:**
|
|
23
|
+
```bash
|
|
24
|
+
pip install torch transformers datasets typer rich pyperclip
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
3. **Install Ollama:**
|
|
28
|
+
Ensure you have [Ollama](https://ollama.com/) installed and running locally with your preferred model (the default for this tool is `llama3`).
|
|
29
|
+
|
|
30
|
+
## 🧠 Training the Brain (Required First Step)
|
|
31
|
+
To keep the GitHub repository fast and lightweight, the compiled neural network weights (`lumen_brain.pth`) are ignored by Git. **You must train the PyTorch brain locally before running a code review.**
|
|
32
|
+
|
|
33
|
+
Run the training pipeline. This script downloads a dataset, tokenizes it via CodeBERT, and trains the PyTorch model directly on your machine:
|
|
34
|
+
```bash
|
|
35
|
+
python train_deep_model.py
|
|
36
|
+
```
|
|
37
|
+
*Note: This will take a few moments and will generate a local `lumen_brain.pth` file in your directory. Do not delete this file.*
|
|
38
|
+
|
|
39
|
+
## 🚀 How to Use Lumen-Py
|
|
40
|
+
|
|
41
|
+
Lumen-Py is operated entirely through the terminal using its custom CLI.
|
|
42
|
+
|
|
43
|
+
### 1. The Interactive Mentor
|
|
44
|
+
Boot up the terminal chat interface for general Socratic mentoring or to brainstorm architectures.
|
|
45
|
+
```bash
|
|
46
|
+
python main.py start
|
|
47
|
+
```
|
|
48
|
+
*(Pro-tip: Inside the chat, try typing `diagram: how a REST API connects to a database` to see the Mermaid visualizer automatically build a flowchart and copy it to your clipboard!)*
|
|
49
|
+
|
|
50
|
+
### 2. Single-File Socratic Review
|
|
51
|
+
Point Lumen at a specific file on your machine. It will analyze the architectural maturity of that script and initiate a targeted Socratic review.
|
|
52
|
+
```bash
|
|
53
|
+
python main.py review path/to/your/script.py
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 3. System-Level Architecture Review
|
|
57
|
+
Point Lumen at an entire project folder. It will scan all `.py` files, ignore your virtual environments, and critique how your files and classes interact with one another.
|
|
58
|
+
```bash
|
|
59
|
+
python main.py review path/to/your/project_folder/
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## 🏗️ Architecture & Stack
|
|
63
|
+
* **Language:** Python 3
|
|
64
|
+
* **Deep Learning:** PyTorch, Hugging Face Transformers (`microsoft/codebert-base`), Datasets
|
|
65
|
+
* **CLI & UI:** Typer, Rich
|
|
66
|
+
* **LLM Integration:** Ollama
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.optim as optim
|
|
4
|
+
from transformers import RobertaTokenizer, RobertaModel
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
class CodeLevelNN(nn.Module):
|
|
8
|
+
"""A PyTorch Feedforward Neural Network to classify architectural maturity."""
|
|
9
|
+
def __init__(self, input_dim: int = 768, hidden_dim: int = 128):
|
|
10
|
+
super(CodeLevelNN, self).__init__()
|
|
11
|
+
self.network = nn.Sequential(
|
|
12
|
+
nn.Linear(input_dim, hidden_dim),
|
|
13
|
+
nn.ReLU(),
|
|
14
|
+
|
|
15
|
+
nn.Dropout(0.2),
|
|
16
|
+
nn.Linear(hidden_dim, 2)
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
20
|
+
return self.network(x)
|
|
21
|
+
|
|
22
|
+
class DeepKnowledgeMapper:
|
|
23
|
+
"""Handles the PyTorch classifier and the CodeBERT embedding pipeline."""
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
26
|
+
self.model = CodeLevelNN(input_dim=768).to(self.device)
|
|
27
|
+
self.tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
|
|
28
|
+
self.codebert = RobertaModel.from_pretrained("microsoft/codebert-base").to(self.device)
|
|
29
|
+
self.weights_path = "lumen_brain.pth"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
for param in self.codebert.parameters():
|
|
33
|
+
param.requires_grad = False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
for param in self.codebert.encoder.layer[-2:].parameters():
|
|
37
|
+
param.requires_grad = True
|
|
38
|
+
|
|
39
|
+
if os.path.exists(self.weights_path):
|
|
40
|
+
checkpoint = torch.load(self.weights_path, map_location=self.device, weights_only=False)
|
|
41
|
+
if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
|
|
42
|
+
self.model.load_state_dict(checkpoint['model_state_dict'])
|
|
43
|
+
self.codebert.load_state_dict(checkpoint['codebert_state_dict'])
|
|
44
|
+
else:
|
|
45
|
+
self.model.load_state_dict(checkpoint)
|
|
46
|
+
self._is_trained = True
|
|
47
|
+
else:
|
|
48
|
+
self._is_trained = False
|
|
49
|
+
|
|
50
|
+
def get_embedding(self, code_snippet: str) -> torch.Tensor:
|
|
51
|
+
"""Translates raw Python code into a 768-number semantic vector."""
|
|
52
|
+
tokens = self.tokenizer(code_snippet, return_tensors="pt", truncation=True, max_length=512)
|
|
53
|
+
input_ids = tokens['input_ids'].to(self.device)
|
|
54
|
+
attention_mask = tokens['attention_mask'].to(self.device)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
outputs = self.codebert(input_ids=input_ids, attention_mask=attention_mask)
|
|
58
|
+
|
|
59
|
+
return outputs.last_hidden_state[:, 0, :]
|
|
60
|
+
|
|
61
|
+
def train(self, code_samples: list[str], labels: list[str], epochs: int = 25, batch_size: int = 32):
|
|
62
|
+
"""Trains end-to-end, updating the un-frozen CodeBERT layers and Neural Network."""
|
|
63
|
+
print(f"\nStep 1: Tokenizing {len(code_samples)} scripts...")
|
|
64
|
+
|
|
65
|
+
label_map = {"Junior": 0, "Senior": 1}
|
|
66
|
+
|
|
67
|
+
# Tokenize everything and keep on CPU to save VRAM
|
|
68
|
+
tokens = self.tokenizer(code_samples, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
|
69
|
+
X_input_ids = tokens['input_ids']
|
|
70
|
+
X_attention_mask = tokens['attention_mask']
|
|
71
|
+
Y = torch.tensor([label_map[l] for l in labels], dtype=torch.long)
|
|
72
|
+
|
|
73
|
+
print("\nStep 2: Training the Neural Network alongside CodeBERT...")
|
|
74
|
+
|
|
75
|
+
trainable_params = list(self.model.parameters()) + \
|
|
76
|
+
list(filter(lambda p: p.requires_grad, self.codebert.parameters()))
|
|
77
|
+
|
|
78
|
+
# We drop the learning rate (e.g., 1e-4) to make sure we don't destroy CodeBERT's weights
|
|
79
|
+
optimizer = optim.Adam(trainable_params, lr=1e-4)
|
|
80
|
+
criterion = nn.CrossEntropyLoss()
|
|
81
|
+
|
|
82
|
+
self.model.train()
|
|
83
|
+
self.codebert.train()
|
|
84
|
+
|
|
85
|
+
dataset_size = len(code_samples)
|
|
86
|
+
|
|
87
|
+
for epoch in range(epochs):
|
|
88
|
+
permutation = torch.randperm(dataset_size)
|
|
89
|
+
epoch_loss = 0
|
|
90
|
+
steps = 0
|
|
91
|
+
|
|
92
|
+
# Process in batches
|
|
93
|
+
for i in range(0, dataset_size, batch_size):
|
|
94
|
+
indices = permutation[i:i+batch_size]
|
|
95
|
+
|
|
96
|
+
# Move batches to device during loop execution
|
|
97
|
+
batch_input_ids = X_input_ids[indices].to(self.device)
|
|
98
|
+
batch_attention_mask = X_attention_mask[indices].to(self.device)
|
|
99
|
+
batch_y = Y[indices].to(self.device)
|
|
100
|
+
|
|
101
|
+
optimizer.zero_grad()
|
|
102
|
+
|
|
103
|
+
# Forward pass through CodeBERT then MLP
|
|
104
|
+
outputs = self.codebert(input_ids=batch_input_ids, attention_mask=batch_attention_mask)
|
|
105
|
+
cls_tokens = outputs.last_hidden_state[:, 0, :]
|
|
106
|
+
logits = self.model(cls_tokens)
|
|
107
|
+
|
|
108
|
+
loss = criterion(logits, batch_y)
|
|
109
|
+
loss.backward()
|
|
110
|
+
optimizer.step()
|
|
111
|
+
|
|
112
|
+
epoch_loss += loss.item()
|
|
113
|
+
steps += 1
|
|
114
|
+
|
|
115
|
+
if (epoch + 1) % 5 == 0 or epoch == 0:
|
|
116
|
+
print(f"Epoch {epoch+1:02d}/{epochs} | Avg Loss: {epoch_loss/steps:.4f}")
|
|
117
|
+
|
|
118
|
+
# Save both state_dicts
|
|
119
|
+
torch.save({
|
|
120
|
+
'model_state_dict': self.model.state_dict(),
|
|
121
|
+
'codebert_state_dict': self.codebert.state_dict(),
|
|
122
|
+
}, self.weights_path)
|
|
123
|
+
self._is_trained = True
|
|
124
|
+
print(f"\n✅ Neural Network and CodeBERT weights saved to {self.weights_path}!")
|
|
125
|
+
|
|
126
|
+
def predict(self, code_snippet: str) -> str:
|
|
127
|
+
"""Predicts the level of a new piece of code."""
|
|
128
|
+
if not self._is_trained:
|
|
129
|
+
return "Unknown (Model untrained)"
|
|
130
|
+
|
|
131
|
+
self.model.eval()
|
|
132
|
+
self.codebert.eval()
|
|
133
|
+
with torch.no_grad():
|
|
134
|
+
embedding = self.get_embedding(code_snippet)
|
|
135
|
+
output = self.model(embedding)
|
|
136
|
+
prediction_idx = torch.argmax(output, dim=1).item()
|
|
137
|
+
|
|
138
|
+
return "Senior" if prediction_idx == 1 else "Junior"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
class LocalAIHandler:
|
|
5
|
+
def __init__(self, model_name: str = "llama3"):
|
|
6
|
+
self.model_name = model_name
|
|
7
|
+
self.url = "http://localhost:11434/api/generate"
|
|
8
|
+
|
|
9
|
+
def generate_response(self, prompt: str) -> str:
|
|
10
|
+
"""The old method: waits for the full response."""
|
|
11
|
+
payload = {"model": self.model_name, "prompt": prompt, "stream": False}
|
|
12
|
+
response = requests.post(self.url, json=payload)
|
|
13
|
+
return response.json().get("response", "")
|
|
14
|
+
|
|
15
|
+
def generate_stream(self, prompt: str):
|
|
16
|
+
"""The new method: streams the response token by token."""
|
|
17
|
+
payload = {"model": self.model_name, "prompt": prompt, "stream": True}
|
|
18
|
+
try:
|
|
19
|
+
with requests.post(self.url, json=payload, stream=True) as response:
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
for line in response.iter_lines():
|
|
22
|
+
if line:
|
|
23
|
+
data = json.loads(line.decode('utf-8'))
|
|
24
|
+
yield data.get("response", "")
|
|
25
|
+
except Exception as e:
|
|
26
|
+
yield f"\n[Error communicating with local AI: {e}]"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lumen-py
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: An AI-powered, local, Object-Oriented coding mentor.
|
|
5
|
+
Author: Bivo
|
|
6
|
+
Requires-Dist: torch
|
|
7
|
+
Requires-Dist: transformers
|
|
8
|
+
Requires-Dist: datasets
|
|
9
|
+
Requires-Dist: typer
|
|
10
|
+
Requires-Dist: rich
|
|
11
|
+
Requires-Dist: pyperclip
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Dynamic: author
|
|
14
|
+
Dynamic: requires-dist
|
|
15
|
+
Dynamic: summary
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
deep_mapper.py
|
|
3
|
+
llm_handler.py
|
|
4
|
+
main.py
|
|
5
|
+
setup.py
|
|
6
|
+
socratic_engine.py
|
|
7
|
+
train_deep_model.py
|
|
8
|
+
visualizer.py
|
|
9
|
+
lumen_py.egg-info/PKG-INFO
|
|
10
|
+
lumen_py.egg-info/SOURCES.txt
|
|
11
|
+
lumen_py.egg-info/dependency_links.txt
|
|
12
|
+
lumen_py.egg-info/entry_points.txt
|
|
13
|
+
lumen_py.egg-info/requires.txt
|
|
14
|
+
lumen_py.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
lumen_py-1.0.0/main.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
import pyperclip
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.markdown import Markdown
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from rich.live import Live
|
|
8
|
+
from llm_handler import LocalAIHandler
|
|
9
|
+
from socratic_engine import SocraticEngine, SocraticStrategy
|
|
10
|
+
from visualizer import MermaidVisualizer
|
|
11
|
+
from deep_mapper import DeepKnowledgeMapper # <-- Bring in the brain!
|
|
12
|
+
|
|
13
|
+
app = typer.Typer()
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
@app.command()
|
|
17
|
+
def start(model: str = typer.Option("llama3", help="The Ollama model to use.")):
|
|
18
|
+
"""
|
|
19
|
+
Start the Lumen-Py AI Mentor terminal session.
|
|
20
|
+
"""
|
|
21
|
+
console.print(Panel.fit("[bold green]Welcome to Lumen-Py[/bold green]\nYour Anti-Dependency AI Mentor.", border_style="green"))
|
|
22
|
+
console.print("[dim]Type 'exit' to quit, or 'diagram: <concept>' to generate an architecture map.[/dim]\n")
|
|
23
|
+
|
|
24
|
+
with console.status("[bold cyan]Booting neural engines...", spinner="dots"):
|
|
25
|
+
handler = LocalAIHandler(model_name=model)
|
|
26
|
+
strategy = SocraticStrategy()
|
|
27
|
+
engine = SocraticEngine(ai_handler=handler, strategy=strategy)
|
|
28
|
+
visualizer = MermaidVisualizer(ai_handler=handler)
|
|
29
|
+
|
|
30
|
+
while True:
|
|
31
|
+
try:
|
|
32
|
+
user_input = console.input("\n[bold blue]You:[/bold blue] ")
|
|
33
|
+
|
|
34
|
+
if user_input.lower() in ['exit', 'quit']:
|
|
35
|
+
console.print("[bold green]Lumen:[/bold green] Keep coding cleanly. Goodbye!")
|
|
36
|
+
break
|
|
37
|
+
|
|
38
|
+
if "diagram" in user_input.lower():
|
|
39
|
+
concept = user_input.lower().replace("diagram", "").strip()
|
|
40
|
+
with console.status("[bold magenta]Drawing on the whiteboard...", spinner="bouncingBar"):
|
|
41
|
+
mermaid_code = visualizer.generate_class_diagram(concept)
|
|
42
|
+
|
|
43
|
+
pyperclip.copy(mermaid_code)
|
|
44
|
+
console.print("\n[bold magenta]Lumen (Diagram):[/bold magenta]")
|
|
45
|
+
console.print(Panel(mermaid_code, title="Mermaid.js", border_style="magenta"))
|
|
46
|
+
console.print("[bold green]✅ Raw code automatically copied to your clipboard![/bold green]")
|
|
47
|
+
console.print("[dim]Just go to https://mermaid.live and press Ctrl+V / Cmd+V[/dim]")
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
console.print("\n[bold green]Lumen:[/bold green]")
|
|
51
|
+
full_response = ""
|
|
52
|
+
# The Live display updates the Markdown rendering in real-time
|
|
53
|
+
with Live(Markdown(""), console=console, refresh_per_second=15) as live:
|
|
54
|
+
for chunk in engine.ask_tutor_stream(user_input):
|
|
55
|
+
full_response += chunk
|
|
56
|
+
live.update(Markdown(full_response))
|
|
57
|
+
|
|
58
|
+
except KeyboardInterrupt:
|
|
59
|
+
console.print("\n[bold green]Lumen:[/bold green] Session terminated. Goodbye!")
|
|
60
|
+
break
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command()
|
|
64
|
+
def review(target_path: Path, model: str = typer.Option("llama3", help="The Ollama model to use.")):
|
|
65
|
+
"""
|
|
66
|
+
Review a single Python file OR an entire project directory.
|
|
67
|
+
"""
|
|
68
|
+
if not target_path.exists():
|
|
69
|
+
console.print(f"[bold red]Error:[/bold red] Could not find {target_path}")
|
|
70
|
+
raise typer.Exit()
|
|
71
|
+
|
|
72
|
+
# --- THE SYSTEM SCANNER ---
|
|
73
|
+
code_content = ""
|
|
74
|
+
file_count = 0
|
|
75
|
+
|
|
76
|
+
with console.status("[bold cyan]Scanning file system...", spinner="dots"):
|
|
77
|
+
if target_path.is_file():
|
|
78
|
+
# Single file logic
|
|
79
|
+
with open(target_path, "r", encoding="utf-8") as f:
|
|
80
|
+
code_content = f"--- FILE: {target_path.name} ---\n{f.read()}\n\n"
|
|
81
|
+
file_count = 1
|
|
82
|
+
console.print(Panel.fit(f"[bold green]Lumen Code Reviewer[/bold green]\nAnalyzing File: {target_path.name}", border_style="green"))
|
|
83
|
+
|
|
84
|
+
elif target_path.is_dir():
|
|
85
|
+
# Directory logic: Crawl the folder
|
|
86
|
+
console.print(Panel.fit(f"[bold green]Lumen System Architect[/bold green]\nAnalyzing Project: {target_path.name}/", border_style="green"))
|
|
87
|
+
|
|
88
|
+
# Define folders to ignore
|
|
89
|
+
ignore_dirs = {".venv", "venv", "env", "__pycache__", ".git", "build", "dist"}
|
|
90
|
+
|
|
91
|
+
for filepath in target_path.rglob("*.py"):
|
|
92
|
+
# Skip ignored directories
|
|
93
|
+
if any(ignored in filepath.parts for ignored in ignore_dirs):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
97
|
+
code_content += f"--- FILE: {filepath.name} ---\n{f.read()}\n\n"
|
|
98
|
+
file_count += 1
|
|
99
|
+
|
|
100
|
+
if file_count == 0:
|
|
101
|
+
console.print("[bold red]Error:[/bold red] No valid Python files found.")
|
|
102
|
+
raise typer.Exit()
|
|
103
|
+
|
|
104
|
+
console.print(f"[dim]Successfully loaded {file_count} file(s) into memory.[/dim]\n")
|
|
105
|
+
|
|
106
|
+
# --- THE REVIEW PIPELINE ---
|
|
107
|
+
with console.status("[bold cyan]Booting neural engines and loading CodeBERT...", spinner="dots"):
|
|
108
|
+
handler = LocalAIHandler(model_name=model)
|
|
109
|
+
mapper = DeepKnowledgeMapper()
|
|
110
|
+
|
|
111
|
+
with console.status("[bold yellow]Analyzing system architecture...", spinner="dots"):
|
|
112
|
+
|
|
113
|
+
level = mapper.predict(code_content[:2000])
|
|
114
|
+
|
|
115
|
+
console.print(f"[bold blue]Overall Architectural Maturity:[/bold blue] {level} Level\n")
|
|
116
|
+
|
|
117
|
+
strategy = SocraticStrategy()
|
|
118
|
+
engine = SocraticEngine(ai_handler=handler, strategy=strategy)
|
|
119
|
+
|
|
120
|
+
hidden_prompt = (
|
|
121
|
+
f"I have provided the code for a Python project below. Your ML model classified the overall architecture "
|
|
122
|
+
f"as {level}-level. Review it as a strict Socratic software architect. Do NOT rewrite "
|
|
123
|
+
f"the code. Look at how the files interact. Ask me exactly ONE targeted question about my project's "
|
|
124
|
+
f"design patterns, separation of concerns, or scalability.\n\n"
|
|
125
|
+
f"PROJECT CODE:\n{code_content}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
console.print("\n[bold green]Lumen:[/bold green]")
|
|
130
|
+
full_response = ""
|
|
131
|
+
|
|
132
|
+
# The Live display updates the Markdown rendering in real-time
|
|
133
|
+
with Live(Markdown(""), console=console, refresh_per_second=15) as live:
|
|
134
|
+
for chunk in engine.ask_tutor_stream(hidden_prompt):
|
|
135
|
+
full_response += chunk
|
|
136
|
+
live.update(Markdown(full_response))
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
app()
|
lumen_py-1.0.0/setup.cfg
ADDED
lumen_py-1.0.0/setup.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="lumen-py",
|
|
5
|
+
version="1.0.0",
|
|
6
|
+
description="An AI-powered, local, Object-Oriented coding mentor.",
|
|
7
|
+
author="Bivo",
|
|
8
|
+
# We list the individual files that make up your tool
|
|
9
|
+
py_modules=["main", "deep_mapper", "llm_handler", "socratic_engine", "visualizer", "train_deep_model"],
|
|
10
|
+
# These are the libraries pip will automatically install for the user
|
|
11
|
+
install_requires=[
|
|
12
|
+
"torch",
|
|
13
|
+
"transformers",
|
|
14
|
+
"datasets",
|
|
15
|
+
"typer",
|
|
16
|
+
"rich",
|
|
17
|
+
"pyperclip",
|
|
18
|
+
"requests"
|
|
19
|
+
],
|
|
20
|
+
# This creates the global terminal command "lumen"
|
|
21
|
+
entry_points={
|
|
22
|
+
"console_scripts": [
|
|
23
|
+
"lumen=main:app",
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from llm_handler import LocalAIHandler
|
|
3
|
+
|
|
4
|
+
# --- 1. The Strategy Interface ---
|
|
5
|
+
class TutorStrategy(ABC):
|
|
6
|
+
"""Abstract base class defining a tutoring strategy."""
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def get_system_prompt(self) -> str:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
# --- 2. Concrete Strategy ---
|
|
13
|
+
class SocraticStrategy(TutorStrategy):
|
|
14
|
+
"""A strict strategy that refuses to write code and only asks guiding questions."""
|
|
15
|
+
|
|
16
|
+
def get_system_prompt(self) -> str:
|
|
17
|
+
return (
|
|
18
|
+
"You are Lumen, an expert Python Architect and strict Socratic tutor. "
|
|
19
|
+
"YOUR PRIME DIRECTIVE: NEVER write code for the user. EVER. "
|
|
20
|
+
"If the user asks for code, refuse politely. Instead, identify the core "
|
|
21
|
+
"concept they are struggling with and ask ONE guiding question to help them "
|
|
22
|
+
"figure it out themselves. Keep your responses short, analytical, and encouraging."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# --- 3. The Context (Engine) ---
|
|
26
|
+
class SocraticEngine:
|
|
27
|
+
"""
|
|
28
|
+
The main engine that uses a TutorStrategy and the LocalAIHandler
|
|
29
|
+
to process user requests.
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self, ai_handler: LocalAIHandler, strategy: TutorStrategy):
|
|
32
|
+
self.ai_handler = ai_handler
|
|
33
|
+
self.strategy = strategy
|
|
34
|
+
|
|
35
|
+
def set_strategy(self, strategy: TutorStrategy):
|
|
36
|
+
"""Allows swapping the tutoring style at runtime."""
|
|
37
|
+
self.strategy = strategy
|
|
38
|
+
|
|
39
|
+
def ask_tutor(self, user_input: str) -> str:
|
|
40
|
+
"""Processes the user input through the current strategy."""
|
|
41
|
+
system_prompt = self.strategy.get_system_prompt()
|
|
42
|
+
return self.ai_handler.generate_response(
|
|
43
|
+
prompt=user_input,
|
|
44
|
+
system_prompt=system_prompt
|
|
45
|
+
)
|
|
46
|
+
def ask_tutor_stream(self, prompt: str):
|
|
47
|
+
"""Passes the system prompt and yields the streamed response."""
|
|
48
|
+
full_prompt = f"{self.strategy.get_system_prompt()}\n\nUser: {prompt}\nLumen:"
|
|
49
|
+
return self.ai_handler.generate_stream(full_prompt)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from datasets import load_dataset
|
|
2
|
+
from deep_mapper import DeepKnowledgeMapper
|
|
3
|
+
|
|
4
|
+
def heuristic_labeler(code_str: str) -> str:
|
|
5
|
+
"""Labels the raw training data."""
|
|
6
|
+
if 'class ' in code_str or '->' in code_str or 'yield ' in code_str or '@' in code_str:
|
|
7
|
+
return "Senior"
|
|
8
|
+
return "Junior"
|
|
9
|
+
|
|
10
|
+
def run_deep_training():
|
|
11
|
+
print("Downloading dataset from Hugging Face...")
|
|
12
|
+
# Pull a larger chunk so we have enough data to mine for "Senior" examples
|
|
13
|
+
dataset = load_dataset("flytech/python-codes-25k", split="train[:5000]")
|
|
14
|
+
|
|
15
|
+
X_train = []
|
|
16
|
+
y_train = []
|
|
17
|
+
|
|
18
|
+
junior_count = 0
|
|
19
|
+
senior_count = 0
|
|
20
|
+
max_per_class = 250 # We want exactly 250 of each (500 total)
|
|
21
|
+
|
|
22
|
+
print("Mining and balancing data...")
|
|
23
|
+
for item in dataset:
|
|
24
|
+
code = item['output']
|
|
25
|
+
label = heuristic_labeler(code)
|
|
26
|
+
|
|
27
|
+
# Only add to our training list if we haven't hit our cap for that class
|
|
28
|
+
if label == "Senior" and senior_count < max_per_class:
|
|
29
|
+
X_train.append(code)
|
|
30
|
+
y_train.append(label)
|
|
31
|
+
senior_count += 1
|
|
32
|
+
elif label == "Junior" and junior_count < max_per_class:
|
|
33
|
+
X_train.append(code)
|
|
34
|
+
y_train.append(label)
|
|
35
|
+
junior_count += 1
|
|
36
|
+
|
|
37
|
+
# Stop searching once we have a perfectly balanced dataset
|
|
38
|
+
if junior_count == max_per_class and senior_count == max_per_class:
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
print(f"Dataset Balanced: {junior_count} Junior | {senior_count} Senior")
|
|
42
|
+
|
|
43
|
+
mapper = DeepKnowledgeMapper()
|
|
44
|
+
|
|
45
|
+
print("\nStarting Deep Learning Pipeline...")
|
|
46
|
+
|
|
47
|
+
mapper.train(X_train, y_train, epochs=20)
|
|
48
|
+
|
|
49
|
+
print("\n--- Live Inference Test ---")
|
|
50
|
+
junior_script = "x = 10\ny = 20\nprint(x + y)"
|
|
51
|
+
senior_script = "class Server:\n def __init__(self):\n self.active = True"
|
|
52
|
+
|
|
53
|
+
print(f"Junior Test -> Classified as: {mapper.predict(junior_script)}")
|
|
54
|
+
print(f"Senior Test -> Classified as: {mapper.predict(senior_script)}")
|
|
55
|
+
print("---------------------------")
|
|
56
|
+
|
|
57
|
+
if __name__ == "__main__":
|
|
58
|
+
run_deep_training()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from llm_handler import LocalAIHandler
|
|
2
|
+
|
|
3
|
+
class MermaidVisualizer:
|
|
4
|
+
"""
|
|
5
|
+
Translates architectural concepts or Python code into Mermaid.js diagram strings.
|
|
6
|
+
"""
|
|
7
|
+
def __init__(self, ai_handler: LocalAIHandler):
|
|
8
|
+
self.ai_handler = ai_handler
|
|
9
|
+
self._system_prompt = (
|
|
10
|
+
"You are an expert Software Architect. Your ONLY job is to generate "
|
|
11
|
+
"valid Mermaid.js diagram code. "
|
|
12
|
+
"DO NOT include any conversational text, explanations, or Markdown formatting. "
|
|
13
|
+
"Return strictly the raw Mermaid syntax.\n\n"
|
|
14
|
+
"CRITICAL SYNTAX RULES:\n"
|
|
15
|
+
"1. For Concepts/Flows: Use `graph TD`. Define nodes: `A[\"Name\"]` and links: `A -->|label| B`.\n"
|
|
16
|
+
"2. For OOP Classes: Use `classDiagram`. Define classes: `class Car { +start() }` and links: `Car --> Engine : contains`.\n"
|
|
17
|
+
"3. NEVER mix flowchart syntax (`graph`) with OOP syntax (`classDiagram`)."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def generate_class_diagram(self, concept_or_code: str) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Generates a UML class diagram based on a concept or code snippet.
|
|
23
|
+
"""
|
|
24
|
+
prompt = (
|
|
25
|
+
f"Generate the most appropriate Mermaid diagram (either `graph TD` for concepts or `classDiagram` for strict code) "
|
|
26
|
+
f"for the following:\n\n{concept_or_code}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# We pass our strict system prompt to the handler
|
|
30
|
+
raw_mermaid = self.ai_handler.generate_response(
|
|
31
|
+
prompt=prompt,
|
|
32
|
+
system_prompt=self._system_prompt
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Safety filter to remove accidental markdown blocks
|
|
36
|
+
clean_mermaid = raw_mermaid.replace("```mermaid", "").replace("```", "").strip()
|
|
37
|
+
|
|
38
|
+
return clean_mermaid
|