algomath-extract 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +260 -0
- package/bin/algo-extract.js +143 -0
- package/bin/algo-generate.js +102 -0
- package/bin/algo-help.js +136 -0
- package/bin/algo-list.js +56 -0
- package/bin/algo-run.js +141 -0
- package/bin/algo-status.js +88 -0
- package/bin/algo-verify.js +189 -0
- package/bin/install.js +349 -0
- package/package.json +57 -0
- package/requirements.txt +20 -0
- package/src/__pycache__/intent.cpython-313.pyc +0 -0
- package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/src/cli/cli_entry.py +106 -0
- package/src/cli/commands.py +339 -0
- package/src/execution/__init__.py +74 -0
- package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/src/execution/display.py +261 -0
- package/src/execution/errors.py +158 -0
- package/src/execution/executor.py +253 -0
- package/src/execution/sandbox.py +333 -0
- package/src/extraction/__init__.py +102 -0
- package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/extraction/boundaries.py +281 -0
- package/src/extraction/errors.py +156 -0
- package/src/extraction/llm_extraction.py +225 -0
- package/src/extraction/notation.py +240 -0
- package/src/extraction/parser.py +402 -0
- package/src/extraction/pdf_processor.py +281 -0
- package/src/extraction/prompts.py +90 -0
- package/src/extraction/review.py +298 -0
- package/src/extraction/schema.py +173 -0
- package/src/extraction/validation.py +202 -0
- package/src/generation/__init__.py +79 -0
- package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/generation/code_generator.py +375 -0
- package/src/generation/errors.py +84 -0
- package/src/generation/hybrid.py +210 -0
- package/src/generation/llm_generator.py +223 -0
- package/src/generation/persistence.py +221 -0
- package/src/generation/prompts.py +202 -0
- package/src/generation/review.py +254 -0
- package/src/generation/templates.py +208 -0
- package/src/generation/types.py +196 -0
- package/src/generation/validation.py +278 -0
- package/src/intent.py +323 -0
- package/src/verification/__init__.py +63 -0
- package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
- package/src/verification/checker.py +220 -0
- package/src/verification/comparison.py +492 -0
- package/src/verification/explainer.py +414 -0
- package/src/verification/static_analysis.py +540 -0
- package/src/workflows/__init__.py +21 -0
- package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
- package/src/workflows/extract.py +181 -0
- package/src/workflows/generate.py +155 -0
- package/src/workflows/run.py +187 -0
- package/src/workflows/verify.py +334 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""LLM-based code generator.
|
|
2
|
+
|
|
3
|
+
This module provides the LLMCodeGenerator class for generating
|
|
4
|
+
Python code using LLM assistance for complex expressions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from src.extraction.schema import Algorithm
|
|
12
|
+
from src.generation.code_generator import GeneratedCode
|
|
13
|
+
from src.generation.errors import LLMGenerationError
|
|
14
|
+
from src.generation.prompts import (
|
|
15
|
+
format_code_generation_prompt,
|
|
16
|
+
format_complex_expression_prompt,
|
|
17
|
+
)
|
|
18
|
+
from src.generation.types import TypeInferrer, ValidationResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LLMCodeGenerator:
|
|
22
|
+
"""
|
|
23
|
+
Generate code using LLM for complex constructs.
|
|
24
|
+
|
|
25
|
+
Falls back to template generation when LLM is unavailable
|
|
26
|
+
or for simple constructs.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self,
|
|
30
|
+
model: str = "claude-3-5-haiku",
|
|
31
|
+
temperature: float = 0.2,
|
|
32
|
+
timeout: int = 30):
|
|
33
|
+
"""
|
|
34
|
+
Initialize the LLM code generator.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
model: LLM model to use
|
|
38
|
+
temperature: Sampling temperature
|
|
39
|
+
timeout: Request timeout in seconds
|
|
40
|
+
"""
|
|
41
|
+
self.model = model
|
|
42
|
+
self.temperature = temperature
|
|
43
|
+
self.timeout = timeout
|
|
44
|
+
self.type_inferrer = TypeInferrer()
|
|
45
|
+
|
|
46
|
+
def generate(self, algorithm: Algorithm) -> GeneratedCode:
|
|
47
|
+
"""
|
|
48
|
+
Generate code using LLM for complex constructs.
|
|
49
|
+
|
|
50
|
+
Uses hybrid approach:
|
|
51
|
+
- LLM for complex expressions templates can't handle
|
|
52
|
+
- Type inference for type hints
|
|
53
|
+
- AST validation for syntax checking
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
algorithm: Algorithm to generate code for
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
GeneratedCode with source and metadata
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
LLMGenerationError: If LLM generation fails
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
# Format prompt for LLM
|
|
66
|
+
messages = format_code_generation_prompt(algorithm)
|
|
67
|
+
|
|
68
|
+
# Call LLM (simulated - would use actual LLM API)
|
|
69
|
+
code = self._call_llm(messages)
|
|
70
|
+
|
|
71
|
+
if not code:
|
|
72
|
+
raise LLMGenerationError("LLM returned empty response")
|
|
73
|
+
|
|
74
|
+
# Validate syntax
|
|
75
|
+
validation = self._validate_syntax(code)
|
|
76
|
+
|
|
77
|
+
return GeneratedCode(
|
|
78
|
+
source=code,
|
|
79
|
+
algorithm_name=algorithm.name,
|
|
80
|
+
imports=[],
|
|
81
|
+
validation_result=validation
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise LLMGenerationError(f"LLM generation failed: {e}")
|
|
86
|
+
|
|
87
|
+
def generate_docstring(self, algorithm: Algorithm) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Generate comprehensive docstring via LLM.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
algorithm: Algorithm to generate docstring for
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Formatted docstring
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
from src.generation.prompts import format_docstring_generation_prompt
|
|
99
|
+
messages = format_docstring_generation_prompt(algorithm)
|
|
100
|
+
docstring = self._call_llm(messages)
|
|
101
|
+
return docstring or self._fallback_docstring(algorithm)
|
|
102
|
+
except Exception:
|
|
103
|
+
return self._fallback_docstring(algorithm)
|
|
104
|
+
|
|
105
|
+
def generate_complex_expression(self,
|
|
106
|
+
expression: str,
|
|
107
|
+
context: Dict[str, Any] = None) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Generate Python code for complex mathematical expression.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
expression: Mathematical expression text
|
|
113
|
+
context: Variable context (available vars, types)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Python code string
|
|
117
|
+
"""
|
|
118
|
+
messages = format_complex_expression_prompt(expression, context)
|
|
119
|
+
code = self._call_llm(messages)
|
|
120
|
+
return code or expression
|
|
121
|
+
|
|
122
|
+
def _call_llm(self, messages: List[Dict[str, str]]) -> Optional[str]:
|
|
123
|
+
"""
|
|
124
|
+
Call LLM API with messages.
|
|
125
|
+
|
|
126
|
+
This is a stub - actual implementation would use
|
|
127
|
+
Anthropic, OpenAI, or other LLM API.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
messages: List of message dicts
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
LLM response text or None
|
|
134
|
+
"""
|
|
135
|
+
# TODO: Implement actual LLM API call
|
|
136
|
+
# For now, return None to trigger fallback
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
def _validate_syntax(self, code: str) -> ValidationResult:
|
|
140
|
+
"""Validate Python syntax."""
|
|
141
|
+
try:
|
|
142
|
+
ast.parse(code)
|
|
143
|
+
return ValidationResult(is_valid=True, errors=[], warnings=[])
|
|
144
|
+
except SyntaxError as e:
|
|
145
|
+
return ValidationResult(
|
|
146
|
+
is_valid=False,
|
|
147
|
+
errors=[{
|
|
148
|
+
'message': f"Syntax error: {e.msg}",
|
|
149
|
+
'line': e.lineno,
|
|
150
|
+
'type': 'syntax',
|
|
151
|
+
'text': e.text
|
|
152
|
+
}],
|
|
153
|
+
warnings=[]
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def _fallback_docstring(self, algorithm: Algorithm) -> str:
|
|
157
|
+
"""Generate basic docstring when LLM fails."""
|
|
158
|
+
lines = []
|
|
159
|
+
lines.append(algorithm.description or f"{algorithm.name} implementation.")
|
|
160
|
+
lines.append("")
|
|
161
|
+
|
|
162
|
+
if algorithm.inputs:
|
|
163
|
+
lines.append("Args:")
|
|
164
|
+
for inp in algorithm.inputs:
|
|
165
|
+
name = inp.get('name', '')
|
|
166
|
+
desc = inp.get('description', '')
|
|
167
|
+
ptype = self.type_inferrer.infer_variable_type(name, inp)
|
|
168
|
+
lines.append(f" {name} ({ptype}): {desc}")
|
|
169
|
+
lines.append("")
|
|
170
|
+
|
|
171
|
+
if algorithm.outputs:
|
|
172
|
+
lines.append("Returns:")
|
|
173
|
+
for out in algorithm.outputs:
|
|
174
|
+
name = out.get('name', '')
|
|
175
|
+
desc = out.get('description', '')
|
|
176
|
+
lines.append(f" {name}: {desc}")
|
|
177
|
+
|
|
178
|
+
return '\n'.join(lines)
|
|
179
|
+
|
|
180
|
+
def _generate_stub(self, algorithm: Algorithm) -> GeneratedCode:
|
|
181
|
+
"""
|
|
182
|
+
Generate stub code when LLM fails.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
algorithm: Algorithm to generate stub for
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
GeneratedCode with stub implementation
|
|
189
|
+
"""
|
|
190
|
+
sig = self.type_inferrer.infer_function_signature(algorithm)
|
|
191
|
+
|
|
192
|
+
stub = f'''def {sig.name}({sig.format_params()}) -> {sig.return_type}:
|
|
193
|
+
"""Stub implementation - LLM generation failed.
|
|
194
|
+
|
|
195
|
+
Steps: {[s.description for s in algorithm.steps]}
|
|
196
|
+
"""
|
|
197
|
+
raise NotImplementedError("Algorithm not yet implemented")
|
|
198
|
+
'''
|
|
199
|
+
|
|
200
|
+
return GeneratedCode(
|
|
201
|
+
source=stub,
|
|
202
|
+
algorithm_name=algorithm.name,
|
|
203
|
+
imports=[],
|
|
204
|
+
validation_result=ValidationResult(is_valid=True, errors=[], warnings=[])
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def generate_with_fallback(self, algorithm: Algorithm) -> GeneratedCode:
|
|
208
|
+
"""
|
|
209
|
+
Generate code with graceful fallback.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
algorithm: Algorithm to generate code for
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
GeneratedCode (may be stub on failure)
|
|
216
|
+
"""
|
|
217
|
+
try:
|
|
218
|
+
return self.generate(algorithm)
|
|
219
|
+
except LLMGenerationError:
|
|
220
|
+
return self._generate_stub(algorithm)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
__all__ = ["LLMCodeGenerator"]
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Code persistence utilities.
|
|
2
|
+
|
|
3
|
+
This module handles saving generated code to the filesystem
|
|
4
|
+
with metadata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
from src.extraction.schema import Algorithm
|
|
14
|
+
from src.generation.review import ReviewState
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CodePersistence:
|
|
18
|
+
"""
|
|
19
|
+
Persist generated code to filesystem.
|
|
20
|
+
|
|
21
|
+
Saves code and metadata to .algomath/algorithms/{name}/
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, base_path: str = ".algomath"):
|
|
25
|
+
"""
|
|
26
|
+
Initialize persistence manager.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
base_path: Base directory for persistence
|
|
30
|
+
"""
|
|
31
|
+
self.base_path = Path(base_path)
|
|
32
|
+
self.algorithms_path = self.base_path / "algorithms"
|
|
33
|
+
|
|
34
|
+
def save_generated_code(self,
|
|
35
|
+
algorithm: Algorithm,
|
|
36
|
+
code: str,
|
|
37
|
+
review_state: Optional[ReviewState] = None) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Save generated code to .algomath/algorithms/{name}/generated.py.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
algorithm: Algorithm metadata
|
|
43
|
+
code: Python code to save
|
|
44
|
+
review_state: Optional review state
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Path to saved file
|
|
48
|
+
"""
|
|
49
|
+
# Create algorithm directory
|
|
50
|
+
algo_dir = self.algorithms_path / self._sanitize_name(algorithm.name)
|
|
51
|
+
algo_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
# Save generated.py
|
|
54
|
+
generated_file = algo_dir / "generated.py"
|
|
55
|
+
|
|
56
|
+
# Add header comment with metadata
|
|
57
|
+
header = self._generate_header(algorithm, review_state)
|
|
58
|
+
full_code = header + "\n\n" + code
|
|
59
|
+
|
|
60
|
+
generated_file.write_text(full_code, encoding='utf-8')
|
|
61
|
+
|
|
62
|
+
# Save metadata
|
|
63
|
+
self._save_metadata(algo_dir, algorithm, review_state)
|
|
64
|
+
|
|
65
|
+
return str(generated_file)
|
|
66
|
+
|
|
67
|
+
def _generate_header(self,
|
|
68
|
+
algorithm: Algorithm,
|
|
69
|
+
review_state: Optional[ReviewState]) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Generate file header with metadata.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
algorithm: Algorithm metadata
|
|
75
|
+
review_state: Optional review state
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Header comment string
|
|
79
|
+
"""
|
|
80
|
+
lines = ['"""']
|
|
81
|
+
lines.append(f"Generated code for: {algorithm.name}")
|
|
82
|
+
lines.append(f"Description: {algorithm.description}")
|
|
83
|
+
lines.append(f"Generated at: {datetime.now().isoformat()}")
|
|
84
|
+
|
|
85
|
+
if review_state:
|
|
86
|
+
lines.append(f"Status: {'Approved' if review_state.is_approved else 'Pending review'}")
|
|
87
|
+
lines.append(f"Edited: {'Yes' if review_state.is_edited else 'No'}")
|
|
88
|
+
|
|
89
|
+
lines.append('"""')
|
|
90
|
+
return '\n'.join(lines)
|
|
91
|
+
|
|
92
|
+
def _save_metadata(self,
|
|
93
|
+
algo_dir: Path,
|
|
94
|
+
algorithm: Algorithm,
|
|
95
|
+
review_state: Optional[ReviewState]):
|
|
96
|
+
"""
|
|
97
|
+
Save metadata JSON alongside code.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
algo_dir: Algorithm directory
|
|
101
|
+
algorithm: Algorithm metadata
|
|
102
|
+
review_state: Optional review state
|
|
103
|
+
"""
|
|
104
|
+
metadata: Dict[str, Any] = {
|
|
105
|
+
'name': algorithm.name,
|
|
106
|
+
'description': algorithm.description,
|
|
107
|
+
'inputs': algorithm.inputs,
|
|
108
|
+
'outputs': algorithm.outputs,
|
|
109
|
+
'step_count': len(algorithm.steps),
|
|
110
|
+
'saved_at': datetime.now().isoformat(),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if review_state:
|
|
114
|
+
metadata['review'] = {
|
|
115
|
+
'is_edited': review_state.is_edited,
|
|
116
|
+
'is_approved': review_state.is_approved,
|
|
117
|
+
'approved_at': review_state.approved_at.isoformat() if review_state.approved_at else None
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
metadata_file = algo_dir / "metadata.json"
|
|
121
|
+
metadata_file.write_text(json.dumps(metadata, indent=2))
|
|
122
|
+
|
|
123
|
+
def load_generated_code(self, algorithm_name: str) -> Optional[str]:
|
|
124
|
+
"""
|
|
125
|
+
Load previously generated code.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
algorithm_name: Algorithm name
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Code string or None if not found
|
|
132
|
+
"""
|
|
133
|
+
algo_dir = self.algorithms_path / self._sanitize_name(algorithm_name)
|
|
134
|
+
generated_file = algo_dir / "generated.py"
|
|
135
|
+
|
|
136
|
+
if generated_file.exists():
|
|
137
|
+
content = generated_file.read_text()
|
|
138
|
+
# Strip header
|
|
139
|
+
if content.startswith('"""'):
|
|
140
|
+
end = content.find('"""', 3)
|
|
141
|
+
if end != -1:
|
|
142
|
+
return content[end + 3:].strip()
|
|
143
|
+
return content
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
def load_metadata(self, algorithm_name: str) -> Optional[Dict[str, Any]]:
|
|
147
|
+
"""
|
|
148
|
+
Load metadata for an algorithm.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
algorithm_name: Algorithm name
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Metadata dict or None if not found
|
|
155
|
+
"""
|
|
156
|
+
algo_dir = self.algorithms_path / self._sanitize_name(algorithm_name)
|
|
157
|
+
metadata_file = algo_dir / "metadata.json"
|
|
158
|
+
|
|
159
|
+
if metadata_file.exists():
|
|
160
|
+
return json.loads(metadata_file.read_text())
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
def _sanitize_name(self, name: str) -> str:
|
|
164
|
+
"""
|
|
165
|
+
Convert algorithm name to filesystem-safe directory name.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
name: Algorithm name
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Sanitized name
|
|
172
|
+
"""
|
|
173
|
+
# Remove special characters
|
|
174
|
+
safe = re.sub(r'[^\w\s-]', '', name)
|
|
175
|
+
# Replace spaces and dashes with underscores
|
|
176
|
+
safe = re.sub(r'[-\s]+', '_', safe)
|
|
177
|
+
return safe.lower().strip('_')
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def save_to_context(context: Any,
|
|
181
|
+
algorithm: Algorithm,
|
|
182
|
+
review: 'CodeReviewInterface') -> str:
|
|
183
|
+
"""
|
|
184
|
+
Save approved code to context.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
context: ContextManager instance
|
|
188
|
+
algorithm: Algorithm metadata
|
|
189
|
+
review: CodeReviewInterface with approval
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Path to saved file
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
ValueError: If code not approved
|
|
196
|
+
"""
|
|
197
|
+
persistence = CodePersistence()
|
|
198
|
+
|
|
199
|
+
if not review.is_approved():
|
|
200
|
+
raise ValueError("Code must be approved before saving")
|
|
201
|
+
|
|
202
|
+
code = review.get_modified_code()
|
|
203
|
+
path = persistence.save_generated_code(
|
|
204
|
+
algorithm,
|
|
205
|
+
code,
|
|
206
|
+
review.state
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Update context
|
|
210
|
+
context.save_code(code)
|
|
211
|
+
# Assuming context has save_code_path method
|
|
212
|
+
if hasattr(context, 'save_code_path'):
|
|
213
|
+
context.save_code_path(path)
|
|
214
|
+
|
|
215
|
+
return path
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
__all__ = [
|
|
219
|
+
'CodePersistence',
|
|
220
|
+
'save_to_context',
|
|
221
|
+
]
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""LLM prompts for code generation.
|
|
2
|
+
|
|
3
|
+
This module provides prompts for generating Python code from
|
|
4
|
+
structured algorithms using LLM assistance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
from src.extraction.schema import Algorithm
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# System prompts
|
|
13
|
+
CODE_GENERATION_SYSTEM_PROMPT = """You are a Python code generator for mathematical algorithms.
|
|
14
|
+
|
|
15
|
+
Your task: Generate executable Python code from structured algorithm steps.
|
|
16
|
+
|
|
17
|
+
Rules:
|
|
18
|
+
1. Use type hints for all parameters and return values
|
|
19
|
+
2. Include Google-style docstrings with Args, Returns sections
|
|
20
|
+
3. Use numpy (np) for matrix/vector operations
|
|
21
|
+
4. Use math module for mathematical functions (sqrt, sin, cos, etc.)
|
|
22
|
+
5. Follow PEP 8 naming conventions (snake_case)
|
|
23
|
+
6. Generate clean, well-formatted code
|
|
24
|
+
7. Handle edge cases gracefully
|
|
25
|
+
|
|
26
|
+
Output format: Return ONLY valid Python code, no markdown fences, no explanations.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
CODE_GENERATION_USER_TEMPLATE = """Generate Python code for this algorithm:
|
|
31
|
+
|
|
32
|
+
Algorithm Name: {name}
|
|
33
|
+
Description: {description}
|
|
34
|
+
|
|
35
|
+
Inputs:
|
|
36
|
+
{inputs}
|
|
37
|
+
|
|
38
|
+
Outputs:
|
|
39
|
+
{outputs}
|
|
40
|
+
|
|
41
|
+
Steps:
|
|
42
|
+
{steps}
|
|
43
|
+
|
|
44
|
+
Requirements:
|
|
45
|
+
- Include type hints
|
|
46
|
+
- Include comprehensive docstring
|
|
47
|
+
- Use numpy for arrays/matrices
|
|
48
|
+
- Generate helper functions if needed
|
|
49
|
+
- Handle all edge cases
|
|
50
|
+
|
|
51
|
+
Generate only the Python function code."""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
COMPLEX_EXPRESSION_SYSTEM_PROMPT = """You are a Python expression generator.
|
|
55
|
+
|
|
56
|
+
Convert mathematical expressions into valid Python code.
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
- "sum of A[i] for i from 1 to n" -> "sum(A[i] for i in range(n))"
|
|
60
|
+
- "minimum distance from u to v" -> "min(dist[u], dist[v])"
|
|
61
|
+
- "argmin of distances" -> "np.argmin(distances)"
|
|
62
|
+
- "set of visited nodes" -> "visited = set()"
|
|
63
|
+
|
|
64
|
+
Use standard Python operators and functions."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
DOCSTRING_GENERATION_PROMPT = """Generate a Google-style docstring for this algorithm.
|
|
68
|
+
|
|
69
|
+
Include:
|
|
70
|
+
- One-line summary
|
|
71
|
+
- Detailed description (from algorithm description)
|
|
72
|
+
- Args section with types and descriptions
|
|
73
|
+
- Returns section with type and description
|
|
74
|
+
- Raises section (if applicable)
|
|
75
|
+
- Complexity notation (if inferable: O(n), O(n²), etc.)
|
|
76
|
+
- Step references (links to step IDs)
|
|
77
|
+
|
|
78
|
+
Example format:
|
|
79
|
+
Triple-quote
|
|
80
|
+
{one_line_summary}
|
|
81
|
+
|
|
82
|
+
{detailed_description}
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
{param}: {type} - {description}
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
{type}: {description}
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: When {condition}
|
|
92
|
+
|
|
93
|
+
Time Complexity: {complexity}
|
|
94
|
+
Space Complexity: {complexity}
|
|
95
|
+
Triple-quote
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def format_inputs(inputs: List[Dict[str, Any]]) -> str:
|
|
100
|
+
"""Format inputs for prompt."""
|
|
101
|
+
if not inputs:
|
|
102
|
+
return " (none)"
|
|
103
|
+
lines = []
|
|
104
|
+
for inp in inputs:
|
|
105
|
+
name = inp.get('name', 'unnamed')
|
|
106
|
+
desc = inp.get('description', '')
|
|
107
|
+
lines.append(f" - {name}: {desc}")
|
|
108
|
+
return '\n'.join(lines)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def format_outputs(outputs: List[Dict[str, Any]]) -> str:
|
|
112
|
+
"""Format outputs for prompt."""
|
|
113
|
+
if not outputs:
|
|
114
|
+
return " (none)"
|
|
115
|
+
lines = []
|
|
116
|
+
for out in outputs:
|
|
117
|
+
name = out.get('name', 'unnamed')
|
|
118
|
+
desc = out.get('description', '')
|
|
119
|
+
lines.append(f" - {name}: {desc}")
|
|
120
|
+
return '\n'.join(lines)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def format_steps(steps: List[Any]) -> str:
|
|
124
|
+
"""Format steps for prompt."""
|
|
125
|
+
if not steps:
|
|
126
|
+
return " (none)"
|
|
127
|
+
lines = []
|
|
128
|
+
for i, step in enumerate(steps, 1):
|
|
129
|
+
lines.append(f" {i}. [{step.type.value if hasattr(step, 'type') else 'step'}] {step.description}")
|
|
130
|
+
return '\n'.join(lines)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def format_code_generation_prompt(algorithm: Algorithm) -> List[Dict[str, str]]:
|
|
134
|
+
"""
|
|
135
|
+
Format messages for LLM code generation.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
algorithm: Algorithm to generate code for
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of message dicts for LLM API
|
|
142
|
+
"""
|
|
143
|
+
return [
|
|
144
|
+
{"role": "system", "content": CODE_GENERATION_SYSTEM_PROMPT},
|
|
145
|
+
{"role": "user", "content": CODE_GENERATION_USER_TEMPLATE.format(
|
|
146
|
+
name=algorithm.name,
|
|
147
|
+
description=algorithm.description,
|
|
148
|
+
inputs=format_inputs(algorithm.inputs),
|
|
149
|
+
outputs=format_outputs(algorithm.outputs),
|
|
150
|
+
steps=format_steps(algorithm.steps)
|
|
151
|
+
)}
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def format_complex_expression_prompt(expression: str, context: Dict[str, Any] = None) -> List[Dict[str, str]]:
|
|
156
|
+
"""
|
|
157
|
+
Format messages for complex expression generation.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
expression: Mathematical expression to convert
|
|
161
|
+
context: Variable context
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of message dicts for LLM API
|
|
165
|
+
"""
|
|
166
|
+
context_str = ""
|
|
167
|
+
if context:
|
|
168
|
+
vars_str = '\n'.join(f" - {k}: {v}" for k, v in context.items())
|
|
169
|
+
context_str = f"\nAvailable variables:\n{vars_str}"
|
|
170
|
+
|
|
171
|
+
return [
|
|
172
|
+
{"role": "system", "content": COMPLEX_EXPRESSION_SYSTEM_PROMPT},
|
|
173
|
+
{"role": "user", "content": f"Convert to Python: {expression}{context_str}"}
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def format_docstring_generation_prompt(algorithm: Algorithm) -> List[Dict[str, str]]:
|
|
178
|
+
"""
|
|
179
|
+
Format messages for docstring generation.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
algorithm: Algorithm to generate docstring for
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
List of message dicts for LLM API
|
|
186
|
+
"""
|
|
187
|
+
return [
|
|
188
|
+
{"role": "system", "content": DOCSTRING_GENERATION_PROMPT},
|
|
189
|
+
{"role": "user", "content": f"Generate docstring for algorithm: {algorithm.name}\n\n{algorithm.description}"}
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Exports
|
|
194
|
+
__all__ = [
|
|
195
|
+
"CODE_GENERATION_SYSTEM_PROMPT",
|
|
196
|
+
"CODE_GENERATION_USER_TEMPLATE",
|
|
197
|
+
"COMPLEX_EXPRESSION_SYSTEM_PROMPT",
|
|
198
|
+
"DOCSTRING_GENERATION_PROMPT",
|
|
199
|
+
"format_code_generation_prompt",
|
|
200
|
+
"format_complex_expression_prompt",
|
|
201
|
+
"format_docstring_generation_prompt",
|
|
202
|
+
]
|