algomath-extract 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +260 -0
  2. package/bin/algo-extract.js +143 -0
  3. package/bin/algo-generate.js +102 -0
  4. package/bin/algo-help.js +136 -0
  5. package/bin/algo-list.js +56 -0
  6. package/bin/algo-run.js +141 -0
  7. package/bin/algo-status.js +88 -0
  8. package/bin/algo-verify.js +189 -0
  9. package/bin/install.js +349 -0
  10. package/package.json +57 -0
  11. package/requirements.txt +20 -0
  12. package/src/__pycache__/intent.cpython-313.pyc +0 -0
  13. package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
  14. package/src/cli/cli_entry.py +106 -0
  15. package/src/cli/commands.py +339 -0
  16. package/src/execution/__init__.py +74 -0
  17. package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
  18. package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
  19. package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
  20. package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
  21. package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
  22. package/src/execution/display.py +261 -0
  23. package/src/execution/errors.py +158 -0
  24. package/src/execution/executor.py +253 -0
  25. package/src/execution/sandbox.py +333 -0
  26. package/src/extraction/__init__.py +102 -0
  27. package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
  28. package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
  29. package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
  30. package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
  31. package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
  32. package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
  33. package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
  34. package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
  35. package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
  36. package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
  37. package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
  38. package/src/extraction/boundaries.py +281 -0
  39. package/src/extraction/errors.py +156 -0
  40. package/src/extraction/llm_extraction.py +225 -0
  41. package/src/extraction/notation.py +240 -0
  42. package/src/extraction/parser.py +402 -0
  43. package/src/extraction/pdf_processor.py +281 -0
  44. package/src/extraction/prompts.py +90 -0
  45. package/src/extraction/review.py +298 -0
  46. package/src/extraction/schema.py +173 -0
  47. package/src/extraction/validation.py +202 -0
  48. package/src/generation/__init__.py +79 -0
  49. package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
  50. package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
  51. package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
  52. package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
  53. package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
  54. package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
  55. package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
  56. package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
  57. package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
  58. package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
  59. package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
  60. package/src/generation/code_generator.py +375 -0
  61. package/src/generation/errors.py +84 -0
  62. package/src/generation/hybrid.py +210 -0
  63. package/src/generation/llm_generator.py +223 -0
  64. package/src/generation/persistence.py +221 -0
  65. package/src/generation/prompts.py +202 -0
  66. package/src/generation/review.py +254 -0
  67. package/src/generation/templates.py +208 -0
  68. package/src/generation/types.py +196 -0
  69. package/src/generation/validation.py +278 -0
  70. package/src/intent.py +323 -0
  71. package/src/verification/__init__.py +63 -0
  72. package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
  73. package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
  74. package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
  75. package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
  76. package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
  77. package/src/verification/checker.py +220 -0
  78. package/src/verification/comparison.py +492 -0
  79. package/src/verification/explainer.py +414 -0
  80. package/src/verification/static_analysis.py +540 -0
  81. package/src/workflows/__init__.py +21 -0
  82. package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
  83. package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
  84. package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
  85. package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
  86. package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
  87. package/src/workflows/extract.py +181 -0
  88. package/src/workflows/generate.py +155 -0
  89. package/src/workflows/run.py +187 -0
  90. package/src/workflows/verify.py +334 -0
@@ -0,0 +1,158 @@
1
+ """Error categorization and translation for execution results.
2
+
3
+ Covers EXE-05 (status reporting) and EXE-06 (meaningful error messages).
4
+ Implements decisions D-17 through D-20 from 04-CONTEXT.md.
5
+ """
6
+
7
+ import re
8
+ from enum import Enum
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+ from subprocess import TimeoutExpired
12
+
13
+
14
+ class ExecutionError(Enum):
15
+ """Execution error categories per D-17.
16
+
17
+ Categorizes errors into mathematician-friendly types:
18
+ - SYNTAX_ERROR: Code parsing issues
19
+ - RUNTIME_ERROR: General execution failures
20
+ - TIMEOUT_ERROR: Execution exceeded time limit
21
+ - MEMORY_ERROR: Execution exceeded memory limit
22
+ - SUCCESS: No error
23
+ """
24
+ SYNTAX_ERROR = "syntax_error"
25
+ RUNTIME_ERROR = "runtime_error"
26
+ TIMEOUT_ERROR = "timeout_error"
27
+ MEMORY_ERROR = "memory_error"
28
+ SUCCESS = "success"
29
+
30
+
31
+ @dataclass
32
+ class ErrorDetails:
33
+ """Error details with user-friendly translation per D-19.
34
+
35
+ Attributes:
36
+ category: The error category (ExecutionError enum)
37
+ user_message: Human-friendly description of what happened
38
+ hint: Suggestion for how to fix (per D-20)
39
+ technical_details: Full traceback/debug info for developers (optional)
40
+ line_number: Line where error occurred (optional)
41
+ """
42
+ category: ExecutionError
43
+ user_message: str
44
+ hint: str
45
+ technical_details: Optional[str] = None
46
+ line_number: Optional[int] = None
47
+
48
+
49
+ class ErrorTranslator:
50
+ """Translate technical errors to mathematician-friendly language per D-18.
51
+
52
+ Converts Python exceptions and error messages into accessible descriptions
53
+ that mathematicians without programming background can understand.
54
+ """
55
+
56
+ TRANSLATIONS = {
57
+ ExecutionError.SYNTAX_ERROR: {
58
+ 'message': 'Generated code has a syntax issue',
59
+ 'hint': 'This is likely a translation error. Try regenerating with clearer pseudocode.'
60
+ },
61
+ ExecutionError.TIMEOUT_ERROR: {
62
+ 'message': 'Algorithm took too long to complete',
63
+ 'hint': 'Check for infinite loops or consider optimizing the algorithm.'
64
+ },
65
+ ExecutionError.MEMORY_ERROR: {
66
+ 'message': 'Algorithm used too much memory',
67
+ 'hint': 'Consider using more memory-efficient data structures or algorithms.'
68
+ },
69
+ ExecutionError.RUNTIME_ERROR: {
70
+ 'message': 'Algorithm encountered an error while running',
71
+ 'hint': 'Review the technical details below for debugging information.'
72
+ },
73
+ ExecutionError.SUCCESS: {
74
+ 'message': 'Algorithm executed successfully',
75
+ 'hint': 'No issues detected.'
76
+ }
77
+ }
78
+
79
+ @classmethod
80
+ def translate(cls, error: ExecutionError, technical: str = "") -> ErrorDetails:
81
+ """Translate error category to user-friendly message.
82
+
83
+ Args:
84
+ error: The error category
85
+ technical: Technical details for debugging (traceback, etc.)
86
+
87
+ Returns:
88
+ ErrorDetails with user-friendly message and hint per D-18, D-20
89
+ """
90
+ translation = cls.TRANSLATIONS.get(error, cls.TRANSLATIONS[ExecutionError.RUNTIME_ERROR])
91
+ return ErrorDetails(
92
+ category=error,
93
+ user_message=translation['message'],
94
+ hint=translation['hint'],
95
+ technical_details=technical if technical else None
96
+ )
97
+
98
+
99
+ def categorize_error(error: Exception, stderr: str = "") -> ExecutionError:
100
+ """Categorize exception into ExecutionError type per D-17.
101
+
102
+ Analyzes the exception type and stderr output to classify errors
103
+ into mathematician-friendly categories.
104
+
105
+ Args:
106
+ error: The exception that occurred
107
+ stderr: Standard error output from execution (optional)
108
+
109
+ Returns:
110
+ ExecutionError category
111
+ """
112
+ # Check by exception type
113
+ if isinstance(error, TimeoutExpired):
114
+ return ExecutionError.TIMEOUT_ERROR
115
+ if isinstance(error, MemoryError):
116
+ return ExecutionError.MEMORY_ERROR
117
+ if isinstance(error, SyntaxError):
118
+ return ExecutionError.SYNTAX_ERROR
119
+
120
+ # Check stderr content for error indicators
121
+ stderr_lower = stderr.lower()
122
+ if "SyntaxError" in stderr:
123
+ return ExecutionError.SYNTAX_ERROR
124
+ if "MemoryError" in stderr or "out of memory" in stderr_lower:
125
+ return ExecutionError.MEMORY_ERROR
126
+ if "timeout" in stderr_lower or "time limit" in stderr_lower:
127
+ return ExecutionError.TIMEOUT_ERROR
128
+
129
+ # Default to runtime error for anything else
130
+ return ExecutionError.RUNTIME_ERROR
131
+
132
+
133
+ def extract_line_number(traceback: str) -> Optional[int]:
134
+ """Extract line number from Python traceback for debugging per D-19.
135
+
136
+ Args:
137
+ traceback: The full traceback string
138
+
139
+ Returns:
140
+ Line number if found, None otherwise
141
+ """
142
+ if not traceback:
143
+ return None
144
+
145
+ # Match "File "path", line N" pattern
146
+ match = re.search(r'File "[^"]+", line (\d+)', traceback)
147
+ if match:
148
+ return int(match.group(1))
149
+ return None
150
+
151
+
152
+ __all__ = [
153
+ 'ExecutionError',
154
+ 'ErrorDetails',
155
+ 'ErrorTranslator',
156
+ 'categorize_error',
157
+ 'extract_line_number',
158
+ ]
@@ -0,0 +1,253 @@
1
+ """High-level execution interface for AlgoMath workflows.
2
+
3
+ Per D-21, D-22, D-23, D-25: Workflow-facing execution interface that:
4
+ - Auto-triggers after code approval
5
+ - Shows progress during execution
6
+ - Can be skipped (user controls flow)
7
+ - Handles inputs and passes them to executed code
8
+ - Integrates with ContextManager for saving results
9
+
10
+ This module provides execute_code() as the primary interface for
11
+ running generated Python code within the AlgoMath workflow.
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+ from dataclasses import dataclass
18
+ from typing import Optional, Dict, Any, List
19
+
20
+ from .sandbox import SandboxExecutor, ExecutionResult, ExecutionStatus
21
+
22
+
23
+ @dataclass
24
+ class ExecutionConfig:
25
+ """Configuration for code execution.
26
+
27
+ Per D-05: Default timeout is 30 seconds.
28
+ Per D-02: Default memory limit is 512MB.
29
+ Per D-30: capture_return_value enables return value capture.
30
+
31
+ Attributes:
32
+ timeout: Maximum execution time in seconds
33
+ max_memory_mb: Maximum memory allowed in megabytes
34
+ working_dir: Optional working directory for file operations
35
+ capture_return_value: Whether to capture function return values
36
+ """
37
+ timeout: int = 30
38
+ max_memory_mb: int = 512
39
+ working_dir: Optional[Path] = None
40
+ capture_return_value: bool = True
41
+
42
+
43
+ def _inject_inputs(code: str, inputs: Dict[str, Any]) -> str:
44
+ """Prepend inputs as JSON and inject reading code.
45
+
46
+ Per D-29: Support stdin redirection for algorithms requiring input.
47
+ This injects a get_input() function that reads from a JSON-serialized
48
+ inputs dictionary.
49
+
50
+ Args:
51
+ code: Original Python code
52
+ inputs: Dictionary of input values
53
+
54
+ Returns:
55
+ Code with input injection wrapper prepended
56
+ """
57
+ # Serialize inputs to JSON
58
+ inputs_json = json.dumps(inputs)
59
+
60
+ # Create wrapper code that defines get_input() function
61
+ inputs_code = f'''
62
+ import json
63
+ __ALGO_INPUTS = json.loads({repr(inputs_json)})
64
+
65
+ def get_input(key: str, default: Any = None) -> Any:
66
+ """Get input value by key.
67
+
68
+ Args:
69
+ key: Input key to retrieve
70
+ default: Default value if key not found
71
+
72
+ Returns:
73
+ Input value or default
74
+ """
75
+ return __ALGO_INPUTS.get(key, default)
76
+ '''
77
+ return inputs_code + "\n\n" + code
78
+
79
+
80
+ def _categorize_error(result: ExecutionResult) -> ExecutionResult:
81
+ """Categorize and translate errors to user-friendly messages.
82
+
83
+ Per D-17, D-18: Convert common errors to mathematician-friendly language.
84
+ - SyntaxError → "Generated code has a syntax issue"
85
+ - TimeoutError → "Algorithm took too long — check for infinite loops"
86
+ - MemoryError → "Algorithm used too much memory"
87
+ - RuntimeError → "Algorithm encountered an error during execution"
88
+
89
+ Args:
90
+ result: Raw execution result
91
+
92
+ Returns:
93
+ ExecutionResult with translated error message
94
+ """
95
+ if result.status == ExecutionStatus.TIMEOUT:
96
+ result.error_message = (
97
+ f"Algorithm took too long to complete ({result.runtime_seconds:.1f}s). "
98
+ "Check for infinite loops."
99
+ )
100
+ elif result.status == ExecutionStatus.MEMORY_ERROR:
101
+ result.error_message = (
102
+ f"Algorithm used too much memory. "
103
+ f"Limit: {result.error_message or 'exceeded'}"
104
+ )
105
+ elif result.status == ExecutionStatus.SYNTAX_ERROR:
106
+ result.error_message = (
107
+ f"Generated code has a syntax issue: {result.error_message or 'Unknown error'}"
108
+ )
109
+ elif result.status == ExecutionStatus.RUNTIME_ERROR:
110
+ result.error_message = (
111
+ f"Algorithm encountered an error during execution: {result.error_message or 'Unknown error'}"
112
+ )
113
+
114
+ return result
115
+
116
+
117
+ def execute_code(
118
+ code: str,
119
+ inputs: Optional[Dict[str, Any]] = None,
120
+ config: Optional[ExecutionConfig] = None
121
+ ) -> ExecutionResult:
122
+ """Execute Python code with sandboxing.
123
+
124
+ Per D-21: Called automatically after code approval.
125
+ Per D-23: Shows progress during execution.
126
+ Per D-25: Can be skipped (returns mock results).
127
+ Per D-26: Python 3.11+ compatibility via sys.executable.
128
+ Per D-27: Standard library only.
129
+
130
+ This is the main entry point for executing generated code in the
131
+ AlgoMath workflow. It handles:
132
+ - Input injection (D-29)
133
+ - Sandboxed execution (D-01)
134
+ - Error categorization (D-17)
135
+ - Result formatting
136
+
137
+ Args:
138
+ code: Python code to execute
139
+ inputs: Optional input dictionary passed to the code
140
+ config: Optional execution configuration
141
+
142
+ Returns:
143
+ ExecutionResult with status, output, and metadata
144
+
145
+ Example:
146
+ >>> result = execute_code(
147
+ ... code='print(get_input("x", 0))',
148
+ ... inputs={"x": 42},
149
+ ... config=ExecutionConfig(timeout=60)
150
+ ... )
151
+ >>> print(result.stdout)
152
+ "42"
153
+ """
154
+ # Use default config if not provided
155
+ config = config or ExecutionConfig()
156
+
157
+ # Wrap inputs if provided per D-29
158
+ if inputs:
159
+ code = _inject_inputs(code, inputs)
160
+
161
+ # Per D-30: Check if code defines main() and wrap for return value capture
162
+ if config.capture_return_value and 'def main(' in code:
163
+ # The sandbox already handles return value capture via wrapper
164
+ pass
165
+
166
+ # Create sandbox executor per D-01
167
+ executor = SandboxExecutor(
168
+ timeout=config.timeout,
169
+ max_memory_mb=config.max_memory_mb
170
+ )
171
+
172
+ # Execute code
173
+ result = executor.execute(code, working_dir=config.working_dir)
174
+
175
+ # Categorize and translate errors per D-17, D-18
176
+ if result.status != ExecutionStatus.SUCCESS:
177
+ result = _categorize_error(result)
178
+
179
+ return result
180
+
181
+
182
+ def format_results_for_context(result: ExecutionResult) -> Dict[str, Any]:
183
+ """Format execution results for ContextManager.save_results().
184
+
185
+ Per D-16: Execution metadata captured alongside outputs.
186
+
187
+ Args:
188
+ result: Execution result
189
+
190
+ Returns:
191
+ Dictionary formatted for context storage
192
+ """
193
+ from datetime import datetime
194
+
195
+ return {
196
+ 'status': result.status.value,
197
+ 'stdout': result.stdout,
198
+ 'stderr': result.stderr,
199
+ 'execution_time': result.runtime_seconds,
200
+ 'return_value': result.return_value,
201
+ 'error_type': result.error_type,
202
+ 'error_message': result.error_message,
203
+ 'timestamp': datetime.now().isoformat()
204
+ }
205
+
206
+
207
+ def build_execution_response(
208
+ result: ExecutionResult,
209
+ truncate_stdout: int = 2000,
210
+ truncate_stderr: int = 1000
211
+ ) -> Dict[str, Any]:
212
+ """Build response dict for workflow functions.
213
+
214
+ Per D-15: Show output with truncation (50 lines max, then summarize).
215
+ Per D-23: Progress indicator included in response.
216
+
217
+ Args:
218
+ result: Execution result
219
+ truncate_stdout: Max chars for stdout (per D-15)
220
+ truncate_stderr: Max chars for stderr
221
+
222
+ Returns:
223
+ Response dictionary for workflow
224
+ """
225
+ # Truncate output per D-15
226
+ stdout_display = result.stdout[:truncate_stdout] if result.stdout else ''
227
+ stderr_display = result.stderr[:truncate_stderr] if result.stderr else ''
228
+
229
+ # Build user message per D-18
230
+ if result.status == ExecutionStatus.SUCCESS:
231
+ message = f"✓ Execution complete in {result.runtime_seconds:.3f}s"
232
+ elif result.status == ExecutionStatus.TIMEOUT:
233
+ message = (
234
+ "⚠ Execution timed out. "
235
+ "Check loop conditions for infinite loops."
236
+ )
237
+ else:
238
+ message = f"✗ Execution failed: {result.error_message or result.error_type or 'Unknown error'}"
239
+
240
+ return {
241
+ 'status': result.status.value,
242
+ 'execution_time': result.runtime_seconds,
243
+ 'stdout': stdout_display,
244
+ 'stderr': stderr_display,
245
+ 'error': result.error_message if result.status != ExecutionStatus.SUCCESS else None,
246
+ 'return_value': result.return_value,
247
+ 'message': message,
248
+ 'next_steps': [
249
+ 'Verify results with /algo-verify',
250
+ 'Run again with /algo-run',
251
+ 'Regenerate code with /algo-generate'
252
+ ]
253
+ }