algomath-extract 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +260 -0
- package/bin/algo-extract.js +143 -0
- package/bin/algo-generate.js +102 -0
- package/bin/algo-help.js +136 -0
- package/bin/algo-list.js +56 -0
- package/bin/algo-run.js +141 -0
- package/bin/algo-status.js +88 -0
- package/bin/algo-verify.js +189 -0
- package/bin/install.js +349 -0
- package/package.json +57 -0
- package/requirements.txt +20 -0
- package/src/__pycache__/intent.cpython-313.pyc +0 -0
- package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/src/cli/cli_entry.py +106 -0
- package/src/cli/commands.py +339 -0
- package/src/execution/__init__.py +74 -0
- package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
- package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/src/execution/display.py +261 -0
- package/src/execution/errors.py +158 -0
- package/src/execution/executor.py +253 -0
- package/src/execution/sandbox.py +333 -0
- package/src/extraction/__init__.py +102 -0
- package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
- package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/extraction/boundaries.py +281 -0
- package/src/extraction/errors.py +156 -0
- package/src/extraction/llm_extraction.py +225 -0
- package/src/extraction/notation.py +240 -0
- package/src/extraction/parser.py +402 -0
- package/src/extraction/pdf_processor.py +281 -0
- package/src/extraction/prompts.py +90 -0
- package/src/extraction/review.py +298 -0
- package/src/extraction/schema.py +173 -0
- package/src/extraction/validation.py +202 -0
- package/src/generation/__init__.py +79 -0
- package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
- package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
- package/src/generation/code_generator.py +375 -0
- package/src/generation/errors.py +84 -0
- package/src/generation/hybrid.py +210 -0
- package/src/generation/llm_generator.py +223 -0
- package/src/generation/persistence.py +221 -0
- package/src/generation/prompts.py +202 -0
- package/src/generation/review.py +254 -0
- package/src/generation/templates.py +208 -0
- package/src/generation/types.py +196 -0
- package/src/generation/validation.py +278 -0
- package/src/intent.py +323 -0
- package/src/verification/__init__.py +63 -0
- package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
- package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
- package/src/verification/checker.py +220 -0
- package/src/verification/comparison.py +492 -0
- package/src/verification/explainer.py +414 -0
- package/src/verification/static_analysis.py +540 -0
- package/src/workflows/__init__.py +21 -0
- package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
- package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
- package/src/workflows/extract.py +181 -0
- package/src/workflows/generate.py +155 -0
- package/src/workflows/run.py +187 -0
- package/src/workflows/verify.py +334 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Verification workflow for AlgoMath.
|
|
3
|
+
|
|
4
|
+
This module implements the verification phase, checking execution
|
|
5
|
+
results and explaining algorithm behavior.
|
|
6
|
+
|
|
7
|
+
Implements Phase 5 verification with:
|
|
8
|
+
- Execution status checking (VER-01)
|
|
9
|
+
- Expected results comparison (VER-02)
|
|
10
|
+
- Algorithm behavior explanation (VER-03)
|
|
11
|
+
- Edge case detection (VER-04)
|
|
12
|
+
- Step-level explanations (VER-05)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Dict, List, Optional, Any
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
|
|
21
|
+
# Add project root to Python path
|
|
22
|
+
project_root = Path(__file__).parent.parent.parent
|
|
23
|
+
if str(project_root) not in sys.path:
|
|
24
|
+
sys.path.insert(0, str(project_root))
|
|
25
|
+
|
|
26
|
+
from src.verification import (
|
|
27
|
+
ExecutionChecker,
|
|
28
|
+
VerificationResult,
|
|
29
|
+
VerificationStatus,
|
|
30
|
+
verify_execution,
|
|
31
|
+
OutputComparator,
|
|
32
|
+
ComparisonResult,
|
|
33
|
+
ComparisonStatus,
|
|
34
|
+
prompt_for_expected,
|
|
35
|
+
compare_outputs,
|
|
36
|
+
AlgorithmExplainer,
|
|
37
|
+
ExplanationResult,
|
|
38
|
+
ExplanationLevel,
|
|
39
|
+
explain_algorithm,
|
|
40
|
+
EdgeCaseDetector,
|
|
41
|
+
EdgeCase,
|
|
42
|
+
EdgeCaseSeverity,
|
|
43
|
+
detect_edge_cases,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def show_progress(phase: str, current: int, total: int) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Generate a progress bar string.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
phase: Name of the current phase
|
|
53
|
+
current: Current step number
|
|
54
|
+
total: Total number of steps
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Formatted progress bar string
|
|
58
|
+
"""
|
|
59
|
+
if total <= 0:
|
|
60
|
+
return f"{phase}: ░░░░░░░░░░ 0%"
|
|
61
|
+
|
|
62
|
+
filled = int(10 * current / total)
|
|
63
|
+
filled = max(0, min(filled, 10)) # Clamp to 0-10 range
|
|
64
|
+
bar = '█' * filled + '░' * (10 - filled)
|
|
65
|
+
pct = int(100 * current / total)
|
|
66
|
+
return f"{phase}: {bar} {pct}%"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def run_verification(
|
|
70
|
+
context: "ContextManager",
|
|
71
|
+
expected: Optional[Any] = None,
|
|
72
|
+
detailed: bool = False,
|
|
73
|
+
diagnostic: bool = False
|
|
74
|
+
) -> Dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Run full verification workflow per VER-01 to VER-05.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
context: ContextManager instance
|
|
80
|
+
expected: Optional expected output for comparison
|
|
81
|
+
detailed: If True, generate detailed explanation per D-06
|
|
82
|
+
diagnostic: If True, run diagnostic mode per D-22, D-23
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dict with verification status and results per D-17 format
|
|
86
|
+
"""
|
|
87
|
+
from algomath.context import ContextManager
|
|
88
|
+
from algomath.state import WorkflowState
|
|
89
|
+
|
|
90
|
+
# Step 1: Check state and load data (progress 1-2)
|
|
91
|
+
progress = show_progress("Verify", 1, 10)
|
|
92
|
+
|
|
93
|
+
algorithm_data = context.store.load_session()
|
|
94
|
+
results = algorithm_data.get('results', {})
|
|
95
|
+
steps = algorithm_data.get('steps', [])
|
|
96
|
+
code = algorithm_data.get('code', '')
|
|
97
|
+
|
|
98
|
+
if not results:
|
|
99
|
+
return {
|
|
100
|
+
'status': 'needs_execution',
|
|
101
|
+
'progress': progress,
|
|
102
|
+
'message': 'No execution results found. Run code first with /algo-run',
|
|
103
|
+
'next_steps': ['/algo-run', '/algo-status']
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
progress = show_progress("Verify", 2, 10)
|
|
107
|
+
|
|
108
|
+
# Step 2: Verify execution status (VER-01) (progress 3)
|
|
109
|
+
checker = ExecutionChecker(results)
|
|
110
|
+
verification = checker.check()
|
|
111
|
+
|
|
112
|
+
# Handle diagnostic mode per D-22, D-23
|
|
113
|
+
if diagnostic and verification.status != VerificationStatus.SUCCESS:
|
|
114
|
+
return _run_diagnostic(context, results, verification)
|
|
115
|
+
|
|
116
|
+
progress = show_progress("Verify", 3, 10)
|
|
117
|
+
|
|
118
|
+
# Step 3: Compare with expected results (VER-02) (progress 4-5)
|
|
119
|
+
comparison = None
|
|
120
|
+
if expected is not None:
|
|
121
|
+
actual = results.get('return_value') or results.get('stdout', '').strip()
|
|
122
|
+
comparison = compare_outputs(expected, actual)
|
|
123
|
+
progress = show_progress("Verify", 5, 10)
|
|
124
|
+
else:
|
|
125
|
+
progress = show_progress("Verify", 4, 10)
|
|
126
|
+
|
|
127
|
+
# Step 4: Explain algorithm behavior (VER-03) (progress 6-7)
|
|
128
|
+
explanation = None
|
|
129
|
+
if steps:
|
|
130
|
+
try:
|
|
131
|
+
from src.extraction.schema import Algorithm
|
|
132
|
+
algorithm = Algorithm.from_dict({
|
|
133
|
+
'name': algorithm_data.get('name', 'unnamed'),
|
|
134
|
+
'description': algorithm_data.get('description', ''),
|
|
135
|
+
'inputs': algorithm_data.get('inputs', []),
|
|
136
|
+
'outputs': algorithm_data.get('outputs', []),
|
|
137
|
+
'steps': steps,
|
|
138
|
+
'source_text': algorithm_data.get('text', '')
|
|
139
|
+
})
|
|
140
|
+
level = ExplanationLevel.DETAILED if detailed else ExplanationLevel.BRIEF
|
|
141
|
+
explanation = explain_algorithm(algorithm, level=level)
|
|
142
|
+
progress = show_progress("Verify", 7, 10)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
explanation = None
|
|
145
|
+
|
|
146
|
+
if not explanation:
|
|
147
|
+
progress = show_progress("Verify", 6, 10)
|
|
148
|
+
|
|
149
|
+
# Step 5: Detect edge cases (VER-04) (progress 8)
|
|
150
|
+
edge_cases = []
|
|
151
|
+
if code:
|
|
152
|
+
try:
|
|
153
|
+
edge_cases = detect_edge_cases(code)
|
|
154
|
+
progress = show_progress("Verify", 8, 10)
|
|
155
|
+
except Exception:
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
# Step 6: Build verification report (progress 9)
|
|
159
|
+
report = _build_verification_report(
|
|
160
|
+
verification=verification,
|
|
161
|
+
comparison=comparison,
|
|
162
|
+
explanation=explanation,
|
|
163
|
+
edge_cases=edge_cases,
|
|
164
|
+
results=results
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
progress = show_progress("Verify", 9, 10)
|
|
168
|
+
|
|
169
|
+
# Step 7: Persist report per D-20
|
|
170
|
+
_save_verification_report(context, report)
|
|
171
|
+
|
|
172
|
+
# Step 8: Mark as verified per D-04
|
|
173
|
+
context.mark_verified()
|
|
174
|
+
|
|
175
|
+
progress = show_progress("Verify", 10, 10)
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
'status': 'verified' if verification.status == VerificationStatus.SUCCESS else 'verified_with_warnings',
|
|
179
|
+
'progress': progress,
|
|
180
|
+
'verification_report': report,
|
|
181
|
+
'message': _format_verification_message(verification, comparison),
|
|
182
|
+
'next_steps': [
|
|
183
|
+
'Request step explanation: /algo-verify --step 1',
|
|
184
|
+
'View detailed explanation: /algo-verify --detailed',
|
|
185
|
+
'Run diagnostic: /algo-verify --diagnostic' if verification.status != VerificationStatus.SUCCESS else None,
|
|
186
|
+
'Extract new algorithm: /algo-extract'
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def verify_step(context: "ContextManager", step_id: int) -> Dict[str, Any]:
|
|
192
|
+
"""
|
|
193
|
+
Provide detailed explanation for a specific step (VER-05).
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
context: ContextManager instance
|
|
197
|
+
step_id: ID of the step to explain
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Dict with step explanation
|
|
201
|
+
"""
|
|
202
|
+
from algomath.context import ContextManager
|
|
203
|
+
|
|
204
|
+
algorithm_data = context.store.load_session()
|
|
205
|
+
steps = algorithm_data.get('steps', [])
|
|
206
|
+
|
|
207
|
+
if not steps:
|
|
208
|
+
return {
|
|
209
|
+
'status': 'error',
|
|
210
|
+
'message': 'No algorithm steps found',
|
|
211
|
+
'next_steps': ['/algo-extract', '/algo-status']
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# Find step
|
|
215
|
+
step_data = next((s for s in steps if s.get('id') == step_id), None)
|
|
216
|
+
if not step_data:
|
|
217
|
+
return {
|
|
218
|
+
'status': 'error',
|
|
219
|
+
'message': f'Step {step_id} not found',
|
|
220
|
+
'next_steps': ['/algo-status']
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# Build step explanation
|
|
224
|
+
explanation = _explain_single_step(step_data, algorithm_data.get('results', {}))
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
'status': 'success',
|
|
228
|
+
'step_id': step_id,
|
|
229
|
+
'explanation': explanation,
|
|
230
|
+
'message': f'Step {step_id} explanation complete',
|
|
231
|
+
'next_steps': ['Explain another step: /algo-verify --step N', 'Full verification: /algo-verify']
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _run_diagnostic(context, results, verification):
|
|
236
|
+
"""Run diagnostic mode for failed executions per D-22, D-23."""
|
|
237
|
+
error_info = results.get('error', {})
|
|
238
|
+
trace = results.get('traceback', '')
|
|
239
|
+
|
|
240
|
+
diagnostic_report = {
|
|
241
|
+
'mode': 'diagnostic',
|
|
242
|
+
'failure_point': error_info.get('line', 'unknown'),
|
|
243
|
+
'involved_values': _extract_involved_values(trace),
|
|
244
|
+
'possible_fixes': _suggest_fixes(error_info)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
'status': 'diagnostic_complete',
|
|
249
|
+
'diagnostic_report': diagnostic_report,
|
|
250
|
+
'message': 'Diagnostic analysis complete. See report for details.',
|
|
251
|
+
'next_steps': ['/algo-run', '/algo-generate', '/algo-status']
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _build_verification_report(verification, comparison, explanation, edge_cases, results):
|
|
256
|
+
"""Build structured verification report per D-17, D-20."""
|
|
257
|
+
report = {
|
|
258
|
+
'summary': verification.execution_summary if verification else 'Verification unavailable',
|
|
259
|
+
'execution': {
|
|
260
|
+
'status': verification.status.value if verification else 'unknown',
|
|
261
|
+
'runtime': getattr(verification, 'runtime_seconds', 0),
|
|
262
|
+
'output_size': getattr(verification, 'output_size', 0),
|
|
263
|
+
'checks_performed': verification.checks_performed if verification else []
|
|
264
|
+
},
|
|
265
|
+
'explanation': explanation.to_dict() if explanation else None,
|
|
266
|
+
'edge_cases': [ec.to_dict() for ec in edge_cases] if edge_cases else [],
|
|
267
|
+
'comparison': comparison.to_dict() if comparison else None,
|
|
268
|
+
'timestamp': verification.timestamp if verification else None
|
|
269
|
+
}
|
|
270
|
+
return report
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _format_verification_message(verification, comparison):
|
|
274
|
+
"""Format user-facing message per D-05, D-11."""
|
|
275
|
+
lines = [verification.execution_summary if verification else 'Verification complete']
|
|
276
|
+
|
|
277
|
+
if comparison:
|
|
278
|
+
if comparison.status == ComparisonStatus.MATCH:
|
|
279
|
+
lines.append('✓ Output matches expected results')
|
|
280
|
+
elif comparison.status == ComparisonStatus.MISMATCH:
|
|
281
|
+
lines.append('⚠ Output differs from expected — see comparison details')
|
|
282
|
+
|
|
283
|
+
return '\n'.join(lines)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _explain_single_step(step_data, results):
|
|
287
|
+
"""Generate explanation for a single step."""
|
|
288
|
+
explanation = f"Step {step_data['id']}: {step_data['description']}\n\n"
|
|
289
|
+
|
|
290
|
+
if step_data.get('inputs'):
|
|
291
|
+
explanation += f"Inputs: {', '.join(step_data['inputs'])}\n"
|
|
292
|
+
if step_data.get('outputs'):
|
|
293
|
+
explanation += f"Outputs: {', '.join(step_data['outputs'])}\n"
|
|
294
|
+
|
|
295
|
+
# Add execution values if available
|
|
296
|
+
execution_values = results.get('trace', {}).get(step_data['id'], {})
|
|
297
|
+
if execution_values:
|
|
298
|
+
explanation += f"\nExecution values: {execution_values}\n"
|
|
299
|
+
|
|
300
|
+
return explanation
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _extract_involved_values(traceback):
|
|
304
|
+
"""Extract variable values from traceback per D-23."""
|
|
305
|
+
return {}
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _suggest_fixes(error_info):
|
|
309
|
+
"""Suggest mathematical fixes per D-23."""
|
|
310
|
+
suggestions = []
|
|
311
|
+
error_type = error_info.get('type', '')
|
|
312
|
+
|
|
313
|
+
if 'ZeroDivisionError' in error_type:
|
|
314
|
+
suggestions.append('Check for zero values before division')
|
|
315
|
+
suggestions.append('Consider adding a guard condition')
|
|
316
|
+
elif 'IndexError' in error_type:
|
|
317
|
+
suggestions.append('Verify array bounds are within valid range')
|
|
318
|
+
|
|
319
|
+
return suggestions
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _save_verification_report(context, report):
|
|
323
|
+
"""Save verification report to file per D-20."""
|
|
324
|
+
try:
|
|
325
|
+
current = context.get_current()
|
|
326
|
+
if current.current_algorithm:
|
|
327
|
+
log_dir = Path('.algomath/algorithms') / current.current_algorithm
|
|
328
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
329
|
+
|
|
330
|
+
log_path = log_dir / 'verification.log'
|
|
331
|
+
with open(log_path, 'w') as f:
|
|
332
|
+
json.dump(report, f, indent=2)
|
|
333
|
+
except Exception:
|
|
334
|
+
pass # Non-critical: log persistence failure gracefully
|