algomath-extract 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +260 -0
- package/bin/algo-extract.js +143 -0
- package/bin/algo-generate.js +102 -0
- package/bin/algo-help.js +136 -0
- package/bin/algo-list.js +56 -0
- package/bin/algo-run.js +141 -0
- package/bin/algo-status.js +88 -0
- package/bin/algo-verify.js +189 -0
- package/bin/install.js +349 -0
- package/package.json +57 -0
- package/requirements.txt +20 -0
- package/src/__pycache__/intent.cpython-313.pyc +0 -0
- package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/src/cli/cli_entry.py +106 -0
- package/src/cli/commands.py +339 -0
- package/src/execution/__init__.py +74 -0
- package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/src/execution/display.py +261 -0
- package/src/execution/errors.py +158 -0
- package/src/execution/executor.py +253 -0
- package/src/execution/sandbox.py +333 -0
- package/src/extraction/__init__.py +102 -0
- package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/extraction/boundaries.py +281 -0
- package/src/extraction/errors.py +156 -0
- package/src/extraction/llm_extraction.py +225 -0
- package/src/extraction/notation.py +240 -0
- package/src/extraction/parser.py +402 -0
- package/src/extraction/pdf_processor.py +281 -0
- package/src/extraction/prompts.py +90 -0
- package/src/extraction/review.py +298 -0
- package/src/extraction/schema.py +173 -0
- package/src/extraction/validation.py +202 -0
- package/src/generation/__init__.py +79 -0
- package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/generation/code_generator.py +375 -0
- package/src/generation/errors.py +84 -0
- package/src/generation/hybrid.py +210 -0
- package/src/generation/llm_generator.py +223 -0
- package/src/generation/persistence.py +221 -0
- package/src/generation/prompts.py +202 -0
- package/src/generation/review.py +254 -0
- package/src/generation/templates.py +208 -0
- package/src/generation/types.py +196 -0
- package/src/generation/validation.py +278 -0
- package/src/intent.py +323 -0
- package/src/verification/__init__.py +63 -0
- package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
- package/src/verification/checker.py +220 -0
- package/src/verification/comparison.py +492 -0
- package/src/verification/explainer.py +414 -0
- package/src/verification/static_analysis.py +540 -0
- package/src/workflows/__init__.py +21 -0
- package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
- package/src/workflows/extract.py +181 -0
- package/src/workflows/generate.py +155 -0
- package/src/workflows/run.py +187 -0
- package/src/workflows/verify.py +334 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""Code review interface.
|
|
2
|
+
|
|
3
|
+
This module provides the CodeReviewInterface for reviewing
|
|
4
|
+
and editing generated Python code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from src.extraction.schema import Algorithm
|
|
12
|
+
from src.generation.code_generator import GeneratedCode
|
|
13
|
+
from src.generation.validation import CodeValidator, ValidationResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ReviewState:
|
|
18
|
+
"""
|
|
19
|
+
State of code review.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
original_code: Original generated code
|
|
23
|
+
current_code: Current (possibly edited) code
|
|
24
|
+
is_edited: Whether code has been edited
|
|
25
|
+
is_approved: Whether code is approved
|
|
26
|
+
validation_result: Last validation result
|
|
27
|
+
approved_at: Approval timestamp
|
|
28
|
+
"""
|
|
29
|
+
original_code: str
|
|
30
|
+
current_code: str
|
|
31
|
+
is_edited: bool = False
|
|
32
|
+
is_approved: bool = False
|
|
33
|
+
validation_result: Optional[ValidationResult] = None
|
|
34
|
+
approved_at: Optional[datetime] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CodeReviewInterface:
|
|
38
|
+
"""
|
|
39
|
+
Review and edit generated Python code.
|
|
40
|
+
|
|
41
|
+
Provides side-by-side view of steps and code,
|
|
42
|
+
editing capability, and approval workflow.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, validator: Optional[CodeValidator] = None):
|
|
46
|
+
"""
|
|
47
|
+
Initialize review interface.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
validator: Optional custom validator
|
|
51
|
+
"""
|
|
52
|
+
self.validator = validator or CodeValidator()
|
|
53
|
+
self.state: Optional[ReviewState] = None
|
|
54
|
+
self.algorithm: Optional[Algorithm] = None
|
|
55
|
+
self.generated: Optional[GeneratedCode] = None
|
|
56
|
+
|
|
57
|
+
def load(self, algorithm: Algorithm, generated: GeneratedCode):
|
|
58
|
+
"""
|
|
59
|
+
Load algorithm and generated code for review.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
algorithm: Algorithm that was generated from
|
|
63
|
+
generated: Generated code
|
|
64
|
+
"""
|
|
65
|
+
self.algorithm = algorithm
|
|
66
|
+
self.generated = generated
|
|
67
|
+
self.state = ReviewState(
|
|
68
|
+
original_code=generated.source,
|
|
69
|
+
current_code=generated.source,
|
|
70
|
+
is_edited=False,
|
|
71
|
+
is_approved=False
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def display(self) -> str:
|
|
75
|
+
"""
|
|
76
|
+
Display side-by-side view of steps and code.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Formatted display string
|
|
80
|
+
"""
|
|
81
|
+
if not self.state:
|
|
82
|
+
return "No code loaded for review."
|
|
83
|
+
|
|
84
|
+
lines = []
|
|
85
|
+
lines.append("╔════════════════════════════════════════════════════════════╗")
|
|
86
|
+
lines.append("║ Algorithm Steps │ Generated Code ║")
|
|
87
|
+
lines.append("╠════════════════════════════════════════════════════════════╣")
|
|
88
|
+
|
|
89
|
+
# Format steps
|
|
90
|
+
steps_text = self._format_steps()
|
|
91
|
+
code_lines = self.state.current_code.split('\n')
|
|
92
|
+
|
|
93
|
+
# Side-by-side display
|
|
94
|
+
step_lines = steps_text.split('\n')
|
|
95
|
+
max_lines = max(len(step_lines), len(code_lines))
|
|
96
|
+
|
|
97
|
+
for i in range(max_lines):
|
|
98
|
+
step_part = step_lines[i] if i < len(step_lines) else ""
|
|
99
|
+
code_part = code_lines[i] if i < len(code_lines) else ""
|
|
100
|
+
|
|
101
|
+
# Truncate to fit display
|
|
102
|
+
step_part = step_part[:30].ljust(30)
|
|
103
|
+
code_part = code_part[:40]
|
|
104
|
+
|
|
105
|
+
lines.append(f"║ {step_part} │ {code_part} ║")
|
|
106
|
+
|
|
107
|
+
lines.append("╚════════════════════════════════════════════════════════════╝")
|
|
108
|
+
|
|
109
|
+
# Status line
|
|
110
|
+
status = []
|
|
111
|
+
if self.state.is_edited:
|
|
112
|
+
status.append("[EDITED]")
|
|
113
|
+
if self.state.is_approved:
|
|
114
|
+
status.append("[APPROVED]")
|
|
115
|
+
if self.state.validation_result and not self.state.validation_result.is_valid:
|
|
116
|
+
status.append(f"[{self.state.validation_result.error_count} ERRORS]")
|
|
117
|
+
|
|
118
|
+
lines.append(f"Status: {' '.join(status) if status else 'Ready for review'}")
|
|
119
|
+
|
|
120
|
+
return '\n'.join(lines)
|
|
121
|
+
|
|
122
|
+
def _format_steps(self) -> str:
|
|
123
|
+
"""Format algorithm steps for display."""
|
|
124
|
+
if not self.algorithm:
|
|
125
|
+
return "No algorithm loaded"
|
|
126
|
+
|
|
127
|
+
lines = []
|
|
128
|
+
for i, step in enumerate(self.algorithm.steps, 1):
|
|
129
|
+
desc = step.description[:27] + "..." if len(step.description) > 30 else step.description
|
|
130
|
+
lines.append(f"{i}. {desc}")
|
|
131
|
+
return '\n'.join(lines)
|
|
132
|
+
|
|
133
|
+
def edit_code(self, new_code: str) -> ValidationResult:
|
|
134
|
+
"""
|
|
135
|
+
Update code with user edits.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
new_code: New code from user edits
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
ValidationResult for the edited code
|
|
142
|
+
"""
|
|
143
|
+
if not self.state:
|
|
144
|
+
raise ValueError("No code loaded")
|
|
145
|
+
|
|
146
|
+
self.state.current_code = new_code
|
|
147
|
+
self.state.is_edited = True
|
|
148
|
+
self.state.is_approved = False # Reset approval on edit
|
|
149
|
+
|
|
150
|
+
# Validate edited code
|
|
151
|
+
self.state.validation_result = self.validator.validate(new_code)
|
|
152
|
+
|
|
153
|
+
return self.state.validation_result
|
|
154
|
+
|
|
155
|
+
def validate_edit(self) -> ValidationResult:
|
|
156
|
+
"""
|
|
157
|
+
Re-validate current code.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
ValidationResult
|
|
161
|
+
"""
|
|
162
|
+
if not self.state:
|
|
163
|
+
raise ValueError("No code loaded")
|
|
164
|
+
|
|
165
|
+
self.state.validation_result = self.validator.validate(
|
|
166
|
+
self.state.current_code
|
|
167
|
+
)
|
|
168
|
+
return self.state.validation_result
|
|
169
|
+
|
|
170
|
+
def approve(self) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Mark code as approved.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if approved successfully
|
|
176
|
+
"""
|
|
177
|
+
if not self.state:
|
|
178
|
+
raise ValueError("No code loaded")
|
|
179
|
+
|
|
180
|
+
# Must validate first
|
|
181
|
+
if not self.state.validation_result:
|
|
182
|
+
self.validate_edit()
|
|
183
|
+
|
|
184
|
+
if self.state.validation_result and not self.state.validation_result.is_valid:
|
|
185
|
+
return False # Cannot approve invalid code
|
|
186
|
+
|
|
187
|
+
self.state.is_approved = True
|
|
188
|
+
self.state.approved_at = datetime.now()
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
def is_approved(self) -> bool:
|
|
192
|
+
"""Check if code is approved."""
|
|
193
|
+
return self.state.is_approved if self.state else False
|
|
194
|
+
|
|
195
|
+
def get_modified_code(self) -> str:
|
|
196
|
+
"""Return user-edited code (or original if not edited)."""
|
|
197
|
+
return self.state.current_code if self.state else ""
|
|
198
|
+
|
|
199
|
+
def get_original_code(self) -> str:
|
|
200
|
+
"""Return original generated code."""
|
|
201
|
+
return self.state.original_code if self.state else ""
|
|
202
|
+
|
|
203
|
+
def has_changes(self) -> bool:
|
|
204
|
+
"""Check if code was edited."""
|
|
205
|
+
if not self.state:
|
|
206
|
+
return False
|
|
207
|
+
return self.state.current_code != self.state.original_code
|
|
208
|
+
|
|
209
|
+
def get_review_summary(self) -> Dict[str, Any]:
|
|
210
|
+
"""
|
|
211
|
+
Return summary of review state.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Dict with review state
|
|
215
|
+
"""
|
|
216
|
+
if not self.state:
|
|
217
|
+
return {}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
'is_edited': self.state.is_edited,
|
|
221
|
+
'is_approved': self.state.is_approved,
|
|
222
|
+
'has_validation_errors': (
|
|
223
|
+
self.state.validation_result and
|
|
224
|
+
not self.state.validation_result.is_valid
|
|
225
|
+
) if self.state.validation_result else False,
|
|
226
|
+
'validation_errors': (
|
|
227
|
+
self.state.validation_result.errors
|
|
228
|
+
) if self.state.validation_result else [],
|
|
229
|
+
'line_count': len(self.state.current_code.split('\n')),
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def create_review(algorithm: Algorithm,
|
|
234
|
+
generated: GeneratedCode) -> CodeReviewInterface:
|
|
235
|
+
"""
|
|
236
|
+
Factory function to create review interface.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
algorithm: Algorithm that was generated from
|
|
240
|
+
generated: Generated code
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
CodeReviewInterface instance
|
|
244
|
+
"""
|
|
245
|
+
review = CodeReviewInterface()
|
|
246
|
+
review.load(algorithm, generated)
|
|
247
|
+
return review
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
__all__ = [
|
|
251
|
+
'CodeReviewInterface',
|
|
252
|
+
'ReviewState',
|
|
253
|
+
'create_review',
|
|
254
|
+
]
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Code templates for algorithm generation.
|
|
2
|
+
|
|
3
|
+
This module provides templates for generating Python code
|
|
4
|
+
from structured algorithm steps.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
|
|
10
|
+
from src.extraction.schema import Step, StepType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CodeTemplates:
|
|
14
|
+
"""Collection of code templates for mathematical operations."""
|
|
15
|
+
|
|
16
|
+
# Mathematical operator templates
|
|
17
|
+
summation = "sum({expression} for {iter_var} in {iter_range})"
|
|
18
|
+
product = "math.prod([{expression} for {iter_var} in {iter_range}])"
|
|
19
|
+
sqrt = "math.sqrt({expression})"
|
|
20
|
+
abs = "abs({expression})"
|
|
21
|
+
|
|
22
|
+
# Set operations
|
|
23
|
+
element_in = "{element} in {set_name}"
|
|
24
|
+
subset = "{set_a} <= {set_b}"
|
|
25
|
+
|
|
26
|
+
# Matrix operations
|
|
27
|
+
matrix_mult = "{matrix_a} @ {matrix_b}"
|
|
28
|
+
matrix_transpose = "{matrix}.T"
|
|
29
|
+
|
|
30
|
+
# Optimization
|
|
31
|
+
argmin = "np.argmin({array})"
|
|
32
|
+
argmax = "np.argmax({array})"
|
|
33
|
+
minimum = "min({expression})"
|
|
34
|
+
maximum = "max({expression})"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TemplateRegistry:
|
|
38
|
+
"""
|
|
39
|
+
Registry mapping StepType to code templates.
|
|
40
|
+
|
|
41
|
+
Provides templates for generating Python code from
|
|
42
|
+
structured algorithm steps.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
DEFAULT_TEMPLATES: Dict[StepType, str] = {
|
|
46
|
+
StepType.ASSIGNMENT: "{target} = {expression}",
|
|
47
|
+
StepType.LOOP_FOR: "for {iter_var} in {iter_range}:\n{body}",
|
|
48
|
+
StepType.LOOP_WHILE: "while {condition}:\n{body}",
|
|
49
|
+
StepType.CONDITIONAL: "if {condition}:\n{if_body}{else_clause}",
|
|
50
|
+
StepType.RETURN: "return {expression}",
|
|
51
|
+
StepType.CALL: "{call_target}({arguments})",
|
|
52
|
+
StepType.COMMENT: "# {annotation}",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def __init__(self):
|
|
56
|
+
"""Initialize template registry with default templates."""
|
|
57
|
+
self._templates = self.DEFAULT_TEMPLATES.copy()
|
|
58
|
+
|
|
59
|
+
def register(self, step_type: StepType, template: str) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Register or override a template for a StepType.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
step_type: Type of step
|
|
65
|
+
template: Template string with placeholders
|
|
66
|
+
"""
|
|
67
|
+
self._templates[step_type] = template
|
|
68
|
+
|
|
69
|
+
def get(self, step_type: StepType) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Get template for a StepType.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
step_type: Type of step
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Template string
|
|
78
|
+
"""
|
|
79
|
+
return self._templates.get(step_type, "# TODO: {description}")
|
|
80
|
+
|
|
81
|
+
def format_step(self, step: Step, indent: int = 4) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Format a step using its template.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
step: Step to format
|
|
87
|
+
indent: Indentation level for nested content
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Formatted code string
|
|
91
|
+
"""
|
|
92
|
+
template = self.get(step.type)
|
|
93
|
+
|
|
94
|
+
if step.type == StepType.ASSIGNMENT:
|
|
95
|
+
target = step.outputs[0] if step.outputs else "result"
|
|
96
|
+
expression = step.expression or "None"
|
|
97
|
+
return template.format(target=target, expression=expression)
|
|
98
|
+
|
|
99
|
+
elif step.type == StepType.LOOP_FOR:
|
|
100
|
+
iter_var = step.iter_var or "i"
|
|
101
|
+
iter_range = step.iter_range or "range(n)"
|
|
102
|
+
body = self._format_body(step.body, indent + 4)
|
|
103
|
+
code = template.format(
|
|
104
|
+
iter_var=iter_var,
|
|
105
|
+
iter_range=iter_range,
|
|
106
|
+
body=body
|
|
107
|
+
)
|
|
108
|
+
return self.indent_lines(code, indent)
|
|
109
|
+
|
|
110
|
+
elif step.type == StepType.LOOP_WHILE:
|
|
111
|
+
condition = step.condition or "True"
|
|
112
|
+
body = self._format_body(step.body, indent + 4)
|
|
113
|
+
code = template.format(condition=condition, body=body)
|
|
114
|
+
return self.indent_lines(code, indent)
|
|
115
|
+
|
|
116
|
+
elif step.type == StepType.CONDITIONAL:
|
|
117
|
+
condition = step.condition or "True"
|
|
118
|
+
if_body = self._format_body(step.body, indent + 4)
|
|
119
|
+
|
|
120
|
+
if step.else_body:
|
|
121
|
+
else_body = self._format_body(step.else_body, indent + 4)
|
|
122
|
+
else_clause = f"\nelse:\n{else_body}"
|
|
123
|
+
else:
|
|
124
|
+
else_clause = ""
|
|
125
|
+
|
|
126
|
+
code = template.format(
|
|
127
|
+
condition=condition,
|
|
128
|
+
if_body=if_body,
|
|
129
|
+
else_clause=else_clause
|
|
130
|
+
)
|
|
131
|
+
return self.indent_lines(code, indent)
|
|
132
|
+
|
|
133
|
+
elif step.type == StepType.RETURN:
|
|
134
|
+
expression = step.expression or "None"
|
|
135
|
+
return template.format(expression=expression)
|
|
136
|
+
|
|
137
|
+
elif step.type == StepType.CALL:
|
|
138
|
+
call_target = step.call_target or "function"
|
|
139
|
+
arguments = ", ".join(step.arguments) if step.arguments else ""
|
|
140
|
+
return template.format(call_target=call_target, arguments=arguments)
|
|
141
|
+
|
|
142
|
+
elif step.type == StepType.COMMENT:
|
|
143
|
+
annotation = step.annotation or step.description
|
|
144
|
+
return template.format(annotation=annotation)
|
|
145
|
+
|
|
146
|
+
return template.format(description=step.description)
|
|
147
|
+
|
|
148
|
+
def _format_body(self, body_step_ids: list, indent: int) -> str:
|
|
149
|
+
"""
|
|
150
|
+
Format body steps (placeholder - actual implementation needs step lookup).
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
body_step_ids: List of step IDs in body
|
|
154
|
+
indent: Indentation level
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Formatted body code
|
|
158
|
+
"""
|
|
159
|
+
if not body_step_ids:
|
|
160
|
+
return self.indent_lines("pass", indent)
|
|
161
|
+
# This would require access to the step map
|
|
162
|
+
# For now, return pass
|
|
163
|
+
return self.indent_lines("# body steps would go here", indent)
|
|
164
|
+
|
|
165
|
+
def indent_lines(self, code: str, level: int) -> str:
|
|
166
|
+
"""
|
|
167
|
+
Indent multi-line code.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
code: Code string
|
|
171
|
+
level: Indentation level (spaces)
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Indented code
|
|
175
|
+
"""
|
|
176
|
+
if not code:
|
|
177
|
+
return ""
|
|
178
|
+
|
|
179
|
+
lines = code.split('\n')
|
|
180
|
+
indent = ' ' * level
|
|
181
|
+
return '\n'.join(indent + line for line in lines)
|
|
182
|
+
|
|
183
|
+
def format_math_expression(self, expression: str) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Format mathematical expression into Python code.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
expression: Mathematical expression text
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Python code string
|
|
192
|
+
"""
|
|
193
|
+
result = expression
|
|
194
|
+
|
|
195
|
+
# Common patterns
|
|
196
|
+
patterns = [
|
|
197
|
+
(r'sum\s+of\s+(.+?)\s+for\s+(\w+)\s+in\s+(.+)',
|
|
198
|
+
r'sum(\1 for \2 in \3)'),
|
|
199
|
+
(r'product\s+of\s+(.+?)\s+for\s+(\w+)\s+in\s+(.+)',
|
|
200
|
+
r'math.prod([\1 for \2 in \3])'),
|
|
201
|
+
(r'sqrt\s*\((.+?)\)', r'math.sqrt(\1)'),
|
|
202
|
+
(r'\|(.+?)\|', r'abs(\1)'),
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
for pattern, replacement in patterns:
|
|
206
|
+
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
|
|
207
|
+
|
|
208
|
+
return result
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Type inference utilities for code generation.
|
|
2
|
+
|
|
3
|
+
This module provides type inference for mathematical variables
|
|
4
|
+
and function signatures based on variable naming conventions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from src.extraction.schema import Algorithm, Step
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ValidationResult:
|
|
16
|
+
"""
|
|
17
|
+
Represents validation results.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
is_valid: Whether validation passed
|
|
21
|
+
errors: List of error dictionaries
|
|
22
|
+
warnings: List of warning messages
|
|
23
|
+
"""
|
|
24
|
+
is_valid: bool
|
|
25
|
+
errors: List[Dict[str, Any]]
|
|
26
|
+
warnings: List[str]
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def has_errors(self) -> bool:
|
|
30
|
+
return len(self.errors) > 0
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def error_count(self) -> int:
|
|
34
|
+
return len(self.errors)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class PythonType:
|
|
39
|
+
"""Represents a Python type annotation."""
|
|
40
|
+
name: str
|
|
41
|
+
is_optional: bool = False
|
|
42
|
+
|
|
43
|
+
def __str__(self) -> str:
|
|
44
|
+
if self.is_optional:
|
|
45
|
+
return f"Optional[{self.name}]"
|
|
46
|
+
return self.name
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class FunctionSignature:
|
|
51
|
+
"""Represents a function signature with parameters and return type."""
|
|
52
|
+
name: str
|
|
53
|
+
params: List[Tuple[str, str]] # (param_name, type_annotation)
|
|
54
|
+
return_type: str
|
|
55
|
+
|
|
56
|
+
def format_params(self) -> str:
|
|
57
|
+
"""Format parameters for function definition."""
|
|
58
|
+
if not self.params:
|
|
59
|
+
return ""
|
|
60
|
+
return ", ".join(f"{name}: {ptype}" for name, ptype in self.params)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TypeInferrer:
|
|
64
|
+
"""
|
|
65
|
+
Infer Python types from variable names and context.
|
|
66
|
+
|
|
67
|
+
Uses naming conventions common in mathematical algorithms
|
|
68
|
+
to infer appropriate Python type annotations.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
# Variable name patterns → types
|
|
72
|
+
TYPE_PATTERNS = {
|
|
73
|
+
# Integer types
|
|
74
|
+
"int": [
|
|
75
|
+
r'^n$', r'^m$', r'^i$', r'^j$', r'^k$',
|
|
76
|
+
r'count', r'index', r'idx', r'size', r'length',
|
|
77
|
+
r'num', r'number', r'total', r'sum',
|
|
78
|
+
],
|
|
79
|
+
# Float types
|
|
80
|
+
"float": [
|
|
81
|
+
r'epsilon', r'delta', r'tolerance', r'threshold',
|
|
82
|
+
r'error', r'precision', r'accuracy',
|
|
83
|
+
],
|
|
84
|
+
# numpy ndarray types
|
|
85
|
+
"np.ndarray": [
|
|
86
|
+
r'matrix', r'^A$', r'^B$', r'^C$', r'^M$',
|
|
87
|
+
r'grid', r'vector', r'^vec', r'^arr',
|
|
88
|
+
r'tensor', r'^T$',
|
|
89
|
+
],
|
|
90
|
+
# List types
|
|
91
|
+
"List": [
|
|
92
|
+
r'items', r'nodes', r'edges', r'vertices',
|
|
93
|
+
r'elements', r'values', r'keys', r'points',
|
|
94
|
+
r'neighbors', r'adj', r'list',
|
|
95
|
+
],
|
|
96
|
+
# Dict types
|
|
97
|
+
"Dict": [
|
|
98
|
+
r'dict', r'map', r'hash', r'table',
|
|
99
|
+
r'graph', r'cache', r'memo',
|
|
100
|
+
],
|
|
101
|
+
# Boolean types
|
|
102
|
+
"bool": [
|
|
103
|
+
r'visited', r'found', r'is_', r'has_',
|
|
104
|
+
r'valid', r'enabled', r'done',
|
|
105
|
+
],
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def infer_variable_type(self, name: str, context: Dict[str, Any]) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Infer Python type from variable name.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
name: Variable name
|
|
114
|
+
context: Additional context (e.g., step information)
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Python type annotation string
|
|
118
|
+
"""
|
|
119
|
+
name_lower = name.lower()
|
|
120
|
+
|
|
121
|
+
# Check each type pattern
|
|
122
|
+
for ptype, patterns in self.TYPE_PATTERNS.items():
|
|
123
|
+
for pattern in patterns:
|
|
124
|
+
try:
|
|
125
|
+
if re.search(pattern, name_lower, re.IGNORECASE):
|
|
126
|
+
return ptype
|
|
127
|
+
except re.error:
|
|
128
|
+
# Skip invalid regex patterns
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
# Default to Any for unknown types
|
|
132
|
+
return "Any"
|
|
133
|
+
|
|
134
|
+
def infer_function_signature(self, algorithm: Algorithm) -> FunctionSignature:
|
|
135
|
+
"""
|
|
136
|
+
Infer complete function signature from algorithm.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
algorithm: Algorithm to infer signature for
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
FunctionSignature with parameters and return type
|
|
143
|
+
"""
|
|
144
|
+
# Infer parameter types from inputs
|
|
145
|
+
params = []
|
|
146
|
+
for inp in algorithm.inputs:
|
|
147
|
+
name = inp.get("name", "")
|
|
148
|
+
inferred_type = self.infer_variable_type(name, inp)
|
|
149
|
+
params.append((name, inferred_type))
|
|
150
|
+
|
|
151
|
+
# Infer return type from outputs
|
|
152
|
+
if not algorithm.outputs:
|
|
153
|
+
return_type = "None"
|
|
154
|
+
elif len(algorithm.outputs) == 1:
|
|
155
|
+
name = algorithm.outputs[0].get("name", "")
|
|
156
|
+
# Special handling for common output names
|
|
157
|
+
if name.lower() in ["result", "path", "distances", "output", "answer"]:
|
|
158
|
+
return_type = "List[float]"
|
|
159
|
+
else:
|
|
160
|
+
return_type = self.infer_variable_type(name, algorithm.outputs[0])
|
|
161
|
+
else:
|
|
162
|
+
# Multiple outputs → Tuple
|
|
163
|
+
types = []
|
|
164
|
+
for out in algorithm.outputs:
|
|
165
|
+
name = out.get("name", "")
|
|
166
|
+
types.append(self.infer_variable_type(name, out))
|
|
167
|
+
return_type = f"Tuple[{', '.join(types)}]"
|
|
168
|
+
|
|
169
|
+
return FunctionSignature(
|
|
170
|
+
name=self._to_snake_case(algorithm.name),
|
|
171
|
+
params=params,
|
|
172
|
+
return_type=return_type
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def format_type_hint(self, type_str: str, optional: bool = False) -> str:
|
|
176
|
+
"""
|
|
177
|
+
Format type hint for PEP 484 compliance.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
type_str: Type string
|
|
181
|
+
optional: Whether type is optional
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Formatted type hint
|
|
185
|
+
"""
|
|
186
|
+
if optional and not type_str.startswith("Optional["):
|
|
187
|
+
return f"Optional[{type_str}]"
|
|
188
|
+
return type_str
|
|
189
|
+
|
|
190
|
+
def _to_snake_case(self, name: str) -> str:
|
|
191
|
+
"""Convert algorithm name to snake_case."""
|
|
192
|
+
# Replace spaces and dashes with underscores
|
|
193
|
+
name = re.sub(r'[\s\-]+', '_', name)
|
|
194
|
+
# Insert underscore before uppercase letters
|
|
195
|
+
name = re.sub(r'([a-z])([A-Z])', r'\1_\2', name)
|
|
196
|
+
return name.lower().strip('_')
|