algomath-extract 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +260 -0
  2. package/bin/algo-extract.js +143 -0
  3. package/bin/algo-generate.js +102 -0
  4. package/bin/algo-help.js +136 -0
  5. package/bin/algo-list.js +56 -0
  6. package/bin/algo-run.js +141 -0
  7. package/bin/algo-status.js +88 -0
  8. package/bin/algo-verify.js +189 -0
  9. package/bin/install.js +349 -0
  10. package/package.json +57 -0
  11. package/requirements.txt +20 -0
  12. package/src/__pycache__/intent.cpython-313.pyc +0 -0
  13. package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
  14. package/src/cli/cli_entry.py +106 -0
  15. package/src/cli/commands.py +339 -0
  16. package/src/execution/__init__.py +74 -0
  17. package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
  18. package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
  19. package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
  20. package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
  21. package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
  22. package/src/execution/display.py +261 -0
  23. package/src/execution/errors.py +158 -0
  24. package/src/execution/executor.py +253 -0
  25. package/src/execution/sandbox.py +333 -0
  26. package/src/extraction/__init__.py +102 -0
  27. package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
  28. package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
  29. package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
  30. package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
  31. package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
  32. package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
  33. package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
  34. package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
  35. package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
  36. package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
  37. package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
  38. package/src/extraction/boundaries.py +281 -0
  39. package/src/extraction/errors.py +156 -0
  40. package/src/extraction/llm_extraction.py +225 -0
  41. package/src/extraction/notation.py +240 -0
  42. package/src/extraction/parser.py +402 -0
  43. package/src/extraction/pdf_processor.py +281 -0
  44. package/src/extraction/prompts.py +90 -0
  45. package/src/extraction/review.py +298 -0
  46. package/src/extraction/schema.py +173 -0
  47. package/src/extraction/validation.py +202 -0
  48. package/src/generation/__init__.py +79 -0
  49. package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
  50. package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
  51. package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
  52. package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
  53. package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
  54. package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
  55. package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
  56. package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
  57. package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
  58. package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
  59. package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
  60. package/src/generation/code_generator.py +375 -0
  61. package/src/generation/errors.py +84 -0
  62. package/src/generation/hybrid.py +210 -0
  63. package/src/generation/llm_generator.py +223 -0
  64. package/src/generation/persistence.py +221 -0
  65. package/src/generation/prompts.py +202 -0
  66. package/src/generation/review.py +254 -0
  67. package/src/generation/templates.py +208 -0
  68. package/src/generation/types.py +196 -0
  69. package/src/generation/validation.py +278 -0
  70. package/src/intent.py +323 -0
  71. package/src/verification/__init__.py +63 -0
  72. package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
  73. package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
  74. package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
  75. package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
  76. package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
  77. package/src/verification/checker.py +220 -0
  78. package/src/verification/comparison.py +492 -0
  79. package/src/verification/explainer.py +414 -0
  80. package/src/verification/static_analysis.py +540 -0
  81. package/src/workflows/__init__.py +21 -0
  82. package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
  83. package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
  84. package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
  85. package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
  86. package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
  87. package/src/workflows/extract.py +181 -0
  88. package/src/workflows/generate.py +155 -0
  89. package/src/workflows/run.py +187 -0
  90. package/src/workflows/verify.py +334 -0
@@ -0,0 +1,281 @@
1
+ """Algorithm boundary detection for extraction.
2
+
3
+ Identifies algorithm sections including headers, inputs, outputs,
4
+ and step boundaries within mathematical text.
5
+
6
+ Per D-12, D-13, D-14, D-15, D-16, D-17 from 02-CONTEXT.md.
7
+ """
8
+ import re
9
+ from typing import Dict, List, Optional, Tuple, NamedTuple
10
+ from dataclasses import dataclass
11
+
12
+
13
+ @dataclass
14
+ class AlgorithmBoundaries:
15
+ """Represents detected algorithm boundaries."""
16
+ name: str
17
+ name_line: Optional[int] = None
18
+ input_start: Optional[int] = None
19
+ input_end: Optional[int] = None
20
+ output_start: Optional[int] = None
21
+ output_end: Optional[int] = None
22
+ steps_start: Optional[int] = None
23
+ steps_end: Optional[int] = None
24
+
25
+
26
+ # Patterns for detecting algorithm headers
27
+ HEADER_PATTERNS = [
28
+ r'^\s*(?:Algorithm|ALGORITHM)[\s:]+([A-Za-z][A-Za-z0-9_\s]*)',
29
+ r'^\s*(?:Procedure|PROCEDURE)[\s:]+([A-Za-z][A-Za-z0-9_\s]*)',
30
+ r'^\s*(?:Function|FUNCTION)[\s:]+([A-Za-z][A-Za-z0-9_\s]*)',
31
+ r'^\s*(?:Method|METHOD)[\s:]+([A-Za-z][A-Za-z0-9_\s]*)',
32
+ ]
33
+
34
+ # Patterns for input sections
35
+ INPUT_PATTERNS = [
36
+ r'^\s*(?:Input|INPUT|Inputs|INPUTS)[\s:]*',
37
+ r'^\s*(?:Given|GIVEN)[\s:]*',
38
+ r'^\s*(?:Parameters|PARAMETERS)[\s:]*',
39
+ r'^\s*(?:Takes|TAKES)[\s:]*',
40
+ r'^\s*(?:Requires|REQUIRES)[\s:]*',
41
+ r'^\s*(?:Precondition|PRECONDITION)[\s:]*',
42
+ ]
43
+
44
+ # Patterns for output sections
45
+ OUTPUT_PATTERNS = [
46
+ r'^\s*(?:Output|OUTPUT|Outputs|OUTPUTS)[\s:]*',
47
+ r'^\s*(?:Returns|RETURNS)[\s:]*',
48
+ r'^\s*(?:Result|RESULT|Results|RESULTS)[\s:]*',
49
+ r'^\s*(?:Produces|PRODUCES)[\s:]*',
50
+ r'^\s*(?:Postcondition|POSTCONDITION)[\s:]*',
51
+ ]
52
+
53
+
54
+ def find_algorithm_name(text: str) -> Tuple[str, Optional[int]]:
55
+ """
56
+ Find algorithm name from header.
57
+
58
+ Searches for patterns like:
59
+ - "Algorithm: Name"
60
+ - "Algorithm Name"
61
+ - "Procedure: Name"
62
+ - "Function Name"
63
+
64
+ Args:
65
+ text: Algorithm text
66
+
67
+ Returns:
68
+ Tuple of (name, line_number) or ("unnamed", None)
69
+
70
+ Per D-13 from 02-CONTEXT.md.
71
+ """
72
+ lines = text.split('\n')
73
+
74
+ for line_num, line in enumerate(lines, 1):
75
+ for pattern in HEADER_PATTERNS:
76
+ match = re.match(pattern, line, re.IGNORECASE)
77
+ if match:
78
+ name = match.group(1).strip()
79
+ # Clean up the name
80
+ name = re.sub(r'\s+', ' ', name)
81
+ if name:
82
+ return name, line_num
83
+
84
+ return "unnamed", None
85
+
86
+
87
+ def extract_input_section(text: str) -> Tuple[Optional[int], Optional[int], List[str]]:
88
+ """
89
+ Extract input section from algorithm text.
90
+
91
+ Identifies input section boundaries and returns the content.
92
+
93
+ Args:
94
+ text: Algorithm text
95
+
96
+ Returns:
97
+ Tuple of (start_line, end_line, input_descriptions)
98
+ Lines are 1-indexed, None if not found
99
+
100
+ Per D-15 from 02-CONTEXT.md.
101
+ """
102
+ lines = text.split('\n')
103
+ start_line = None
104
+ end_line = None
105
+
106
+ # Find input section header
107
+ for line_num, line in enumerate(lines, 1):
108
+ for pattern in INPUT_PATTERNS:
109
+ if re.match(pattern, line, re.IGNORECASE):
110
+ start_line = line_num
111
+ break
112
+ if start_line:
113
+ break
114
+
115
+ if not start_line:
116
+ return None, None, []
117
+
118
+ # Extract input descriptions until next section or end
119
+ input_descriptions = []
120
+ end_line = start_line
121
+
122
+ for line_num in range(start_line, len(lines) + 1):
123
+ line = lines[line_num - 1]
124
+
125
+ # Check for end of input section (output section or steps)
126
+ if line_num > start_line:
127
+ if _is_section_boundary(line):
128
+ break
129
+
130
+ # Skip the header line itself
131
+ if line_num == start_line:
132
+ # Remove header part
133
+ clean_line = re.sub(r'^\s*(?:Input|INPUT)[\s:]*', '', line).strip()
134
+ if clean_line:
135
+ input_descriptions.append(clean_line)
136
+ else:
137
+ stripped = line.strip()
138
+ if stripped and not _is_section_boundary(line):
139
+ input_descriptions.append(stripped)
140
+
141
+ end_line = line_num
142
+
143
+ return start_line, end_line, input_descriptions
144
+
145
+
146
+ def extract_output_section(text: str) -> Tuple[Optional[int], Optional[int], List[str]]:
147
+ """
148
+ Extract output section from algorithm text.
149
+
150
+ Identifies output section boundaries and returns the content.
151
+
152
+ Args:
153
+ text: Algorithm text
154
+
155
+ Returns:
156
+ Tuple of (start_line, end_line, output_descriptions)
157
+ Lines are 1-indexed, None if not found
158
+
159
+ Per D-16 from 02-CONTEXT.md.
160
+ """
161
+ lines = text.split('\n')
162
+ start_line = None
163
+ end_line = None
164
+
165
+ # Find output section header
166
+ for line_num, line in enumerate(lines, 1):
167
+ for pattern in OUTPUT_PATTERNS:
168
+ if re.match(pattern, line, re.IGNORECASE):
169
+ start_line = line_num
170
+ break
171
+ if start_line:
172
+ break
173
+
174
+ if not start_line:
175
+ return None, None, []
176
+
177
+ # Extract output descriptions until next section or end
178
+ output_descriptions = []
179
+ end_line = start_line
180
+
181
+ for line_num in range(start_line, len(lines) + 1):
182
+ line = lines[line_num - 1]
183
+
184
+ # Check for end of output section
185
+ if line_num > start_line:
186
+ if _is_section_boundary(line):
187
+ break
188
+
189
+ # Skip the header line itself
190
+ if line_num == start_line:
191
+ clean_line = re.sub(r'^\s*(?:Output|OUTPUT)[\s:]*', '', line).strip()
192
+ if clean_line:
193
+ output_descriptions.append(clean_line)
194
+ else:
195
+ stripped = line.strip()
196
+ if stripped and not _is_section_boundary(line):
197
+ output_descriptions.append(stripped)
198
+
199
+ end_line = line_num
200
+
201
+ return start_line, end_line, output_descriptions
202
+
203
+
204
+ def detect_algorithm_boundaries(text: str) -> AlgorithmBoundaries:
205
+ """
206
+ Detect all algorithm boundaries in text.
207
+
208
+ Per D-12, D-13, D-14 from 02-CONTEXT.md.
209
+
210
+ Args:
211
+ text: Algorithm text
212
+
213
+ Returns:
214
+ AlgorithmBoundaries with detected sections
215
+ """
216
+ name, name_line = find_algorithm_name(text)
217
+
218
+ input_start, input_end, _ = extract_input_section(text)
219
+ output_start, output_end, _ = extract_output_section(text)
220
+
221
+ # Detect steps section (after outputs or after name if no I/O)
222
+ lines = text.split('\n')
223
+ steps_start = None
224
+ steps_end = len(lines)
225
+
226
+ # Start after the latest of: name, input, output
227
+ potential_start = name_line or 1
228
+ if input_end:
229
+ potential_start = max(potential_start, input_end + 1)
230
+ if output_end:
231
+ potential_start = max(potential_start, output_end + 1)
232
+
233
+ # Look for numbered steps
234
+ for line_num in range(potential_start, len(lines) + 1):
235
+ line = lines[line_num - 1]
236
+ if re.match(r'^\s*\d+[.\)]\s+', line) or re.match(r'^\s*[Ss]tep\s+\d+', line):
237
+ steps_start = line_num
238
+ break
239
+
240
+ # If no numbered steps found, use the line after sections
241
+ if not steps_start:
242
+ steps_start = potential_start
243
+
244
+ return AlgorithmBoundaries(
245
+ name=name,
246
+ name_line=name_line,
247
+ input_start=input_start,
248
+ input_end=input_end,
249
+ output_start=output_start,
250
+ output_end=output_end,
251
+ steps_start=steps_start,
252
+ steps_end=steps_end
253
+ )
254
+
255
+
256
+ def _is_section_boundary(line: str) -> bool:
257
+ """
258
+ Check if line marks a section boundary.
259
+
260
+ Returns True for:
261
+ - Empty lines (double newline)
262
+ - Output headers after input
263
+ - Step indicators
264
+ - Algorithm boundaries
265
+ """
266
+ stripped = line.strip()
267
+
268
+ if not stripped:
269
+ return True
270
+
271
+ # Check for section headers
272
+ all_headers = HEADER_PATTERNS + INPUT_PATTERNS + OUTPUT_PATTERNS
273
+ for pattern in all_headers:
274
+ if re.match(pattern, line, re.IGNORECASE):
275
+ return True
276
+
277
+ # Check for numbered steps
278
+ if re.match(r'^\s*\d+[.\)]', line):
279
+ return True
280
+
281
+ return False
@@ -0,0 +1,156 @@
1
+ """Extraction error types for AlgoMath."""
2
+ from typing import Optional, List
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class ExtractionError(Exception):
8
+ """Base class for extraction errors."""
9
+
10
+ message: str
11
+ line_number: Optional[int] = None
12
+ suggestion: Optional[str] = None
13
+
14
+ def __str__(self) -> str:
15
+ parts = [self.message]
16
+ if self.line_number:
17
+ parts.append(f" (at line {self.line_number})")
18
+ if self.suggestion:
19
+ parts.append(f"\nSuggestion: {self.suggestion}")
20
+ return "".join(parts)
21
+
22
+ def to_dict(self) -> dict:
23
+ """Convert to dictionary for JSON serialization."""
24
+ return {
25
+ "type": self.__class__.__name__,
26
+ "message": self.message,
27
+ "line_number": self.line_number,
28
+ "suggestion": self.suggestion
29
+ }
30
+
31
+
32
+ class ParseError(ExtractionError):
33
+ """
34
+ Raised when text cannot be parsed.
35
+
36
+ Per D-23 from 02-CONTEXT.md.
37
+ """
38
+
39
+ def __init__(self, message: str, line_number: Optional[int] = None,
40
+ suggestion: Optional[str] = None):
41
+ super().__init__(
42
+ message=f"Parse error: {message}",
43
+ line_number=line_number,
44
+ suggestion=suggestion or "Check syntax and try again"
45
+ )
46
+
47
+
48
+ class AmbiguityError(ExtractionError):
49
+ """
50
+ Raised when multiple valid interpretations exist.
51
+
52
+ Per D-23 from 02-CONTEXT.md.
53
+ """
54
+
55
+ def __init__(self, message: str, line_number: Optional[int] = None,
56
+ interpretations: Optional[List[str]] = None,
57
+ suggestion: Optional[str] = None):
58
+ super().__init__(
59
+ message=f"Ambiguity: {message}",
60
+ line_number=line_number,
61
+ suggestion=suggestion or "Provide more context"
62
+ )
63
+ self.interpretations = interpretations or []
64
+
65
+
66
+ class IncompleteError(ExtractionError):
67
+ """
68
+ Raised when algorithm appears incomplete.
69
+
70
+ Per D-23 from 02-CONTEXT.md.
71
+ """
72
+
73
+ def __init__(self, message: str, line_number: Optional[int] = None,
74
+ missing: Optional[List[str]] = None,
75
+ suggestion: Optional[str] = None):
76
+ super().__init__(
77
+ message=f"Incomplete: {message}",
78
+ line_number=line_number,
79
+ suggestion=suggestion or "Add missing information"
80
+ )
81
+ self.missing = missing or []
82
+
83
+
84
+ def categorize_error(error_text: str, line_number: Optional[int] = None) -> ExtractionError:
85
+ """
86
+ Categorize an error message into appropriate error type.
87
+
88
+ Args:
89
+ error_text: Raw error message
90
+ line_number: Line where error occurred
91
+
92
+ Returns:
93
+ Categorized ExtractionError
94
+
95
+ Per D-23 from 02-CONTEXT.md.
96
+ """
97
+ text_lower = error_text.lower()
98
+
99
+ # Parse errors
100
+ parse_patterns = [
101
+ "unmatched", "unexpected", "invalid syntax", "parse",
102
+ "cannot parse", "syntax error", "malformed"
103
+ ]
104
+ if any(p in text_lower for p in parse_patterns):
105
+ return ParseError(
106
+ message=error_text,
107
+ line_number=line_number,
108
+ suggestion="Check for matching parentheses, brackets, or quotes"
109
+ )
110
+
111
+ # Ambiguity errors
112
+ ambiguity_patterns = [
113
+ "ambiguous", "could mean", "could be", "unclear",
114
+ "multiple interpretations", "not sure if", "could refer to"
115
+ ]
116
+ if any(p in text_lower for p in ambiguity_patterns):
117
+ return AmbiguityError(
118
+ message=error_text,
119
+ line_number=line_number,
120
+ suggestion="Clarify the meaning with more specific language"
121
+ )
122
+
123
+ # Incomplete errors
124
+ incomplete_patterns = [
125
+ "incomplete", "missing", "not found", "expected",
126
+ "end of input", "unexpected end", "truncated"
127
+ ]
128
+ if any(p in text_lower for p in incomplete_patterns):
129
+ return IncompleteError(
130
+ message=error_text,
131
+ line_number=line_number,
132
+ suggestion="Ensure the algorithm has a complete description"
133
+ )
134
+
135
+ # Default to generic extraction error
136
+ return ExtractionError(
137
+ message=error_text,
138
+ line_number=line_number,
139
+ suggestion="Review the text and try again"
140
+ )
141
+
142
+
143
+ def format_errors_for_user(errors: List[ExtractionError]) -> str:
144
+ """
145
+ Format multiple errors into user-friendly message.
146
+
147
+ Per D-24 from 02-CONTEXT.md.
148
+ """
149
+ if not errors:
150
+ return "No errors found."
151
+
152
+ lines = ["Extraction completed with issues:"]
153
+ for i, error in enumerate(errors, 1):
154
+ lines.append(f"\n{i}. {error}")
155
+
156
+ return "\n".join(lines)
@@ -0,0 +1,225 @@
1
+ """LLM-based extraction with hybrid fallback to rule-based parser."""
2
+
3
+ import json
4
+ import re
5
+ from typing import Optional, List, Any
6
+ from dataclasses import dataclass
7
+
8
+ from .schema import Algorithm, Step, StepType
9
+ from .parser import RuleBasedParser
10
+ from .prompts import EXTRACTION_SYSTEM_PROMPT, format_extraction_prompt
11
+
12
+
13
+ @dataclass
14
+ class ExtractionResult:
15
+ """Result of extraction with metadata."""
16
+ algorithm: Algorithm
17
+ success: bool
18
+ method: str # "llm" or "rule_based"
19
+ errors: List[str]
20
+ line_references: List[List[int]]
21
+
22
+ def __post_init__(self):
23
+ if self.errors is None:
24
+ self.errors = []
25
+
26
+
27
+ def extract_algorithm_llm(
28
+ text: str,
29
+ timeout: int = 30
30
+ ) -> ExtractionResult:
31
+ """
32
+ Extract algorithm using LLM with rule-based fallback.
33
+
34
+ Args:
35
+ text: Algorithm description text
36
+ timeout: Maximum time in seconds (per D-27)
37
+
38
+ Returns:
39
+ ExtractionResult with algorithm and metadata
40
+
41
+ Per D-01, D-04 from 02-CONTEXT.md.
42
+ """
43
+ errors = []
44
+
45
+ try:
46
+ # Format prompt with line numbers
47
+ user_prompt = format_extraction_prompt(text)
48
+
49
+ # Call LLM (using agent's completion capability)
50
+ llm_response = _call_llm(
51
+ system=EXTRACTION_SYSTEM_PROMPT,
52
+ user=user_prompt,
53
+ timeout=timeout
54
+ )
55
+
56
+ if llm_response:
57
+ # Parse JSON response
58
+ algorithm = _parse_llm_response(llm_response, text)
59
+ if algorithm:
60
+ return ExtractionResult(
61
+ algorithm=algorithm,
62
+ success=True,
63
+ method="llm",
64
+ errors=[],
65
+ line_references=[step.line_refs for step in algorithm.steps]
66
+ )
67
+
68
+ errors.append("LLM extraction returned no valid result")
69
+
70
+ except Exception as e:
71
+ errors.append(f"LLM extraction failed: {str(e)}")
72
+
73
+ # Fallback to rule-based parser
74
+ try:
75
+ parser = RuleBasedParser()
76
+ algorithm = parser.parse(text)
77
+
78
+ return ExtractionResult(
79
+ algorithm=algorithm,
80
+ success=True,
81
+ method="rule_based",
82
+ errors=errors + ["Fell back to rule-based parser"],
83
+ line_references=[step.line_refs for step in algorithm.steps]
84
+ )
85
+
86
+ except Exception as e:
87
+ errors.append(f"Rule-based fallback failed: {str(e)}")
88
+
89
+ # Return empty algorithm
90
+ return ExtractionResult(
91
+ algorithm=Algorithm(name="unnamed", source_text=text),
92
+ success=False,
93
+ method="failed",
94
+ errors=errors,
95
+ line_references=[]
96
+ )
97
+
98
+
99
+ def _call_llm(system: str, user: str, timeout: int) -> Optional[str]:
100
+ """
101
+ Call LLM for extraction. Uses agent's native capabilities.
102
+
103
+ In actual implementation, this would call the AI assistant.
104
+ For now, returns None to trigger fallback.
105
+ """
106
+ # Placeholder - actual implementation would use agent
107
+ return None
108
+
109
+
110
+ def _parse_llm_response(response: str, original_text: str) -> Optional[Algorithm]:
111
+ """
112
+ Parse LLM JSON response into Algorithm object.
113
+
114
+ Args:
115
+ response: JSON string from LLM
116
+ original_text: Original algorithm text
117
+
118
+ Returns:
119
+ Algorithm object or None if parsing fails
120
+ """
121
+ try:
122
+ # Extract JSON from response (in case of markdown code blocks)
123
+ json_match = re.search(r'```(?:json)?\s*\n?(.*?)```', response, re.DOTALL)
124
+ if json_match:
125
+ json_str = json_match.group(1)
126
+ else:
127
+ json_str = response
128
+
129
+ # Clean up
130
+ json_str = json_str.strip()
131
+
132
+ # Parse JSON
133
+ data = json.loads(json_str)
134
+
135
+ # Build Algorithm
136
+ algorithm = Algorithm(
137
+ name=data.get("name", "unnamed"),
138
+ description=data.get("description", ""),
139
+ source_text=original_text
140
+ )
141
+
142
+ # Parse inputs
143
+ algorithm.inputs = data.get("inputs", [])
144
+
145
+ # Parse outputs
146
+ algorithm.outputs = data.get("outputs", [])
147
+
148
+ # Parse steps
149
+ steps = []
150
+ for step_data in data.get("steps", []):
151
+ step_type_str = step_data.get("type", "comment")
152
+ try:
153
+ step_type = StepType(step_type_str)
154
+ except ValueError:
155
+ step_type = StepType.COMMENT
156
+
157
+ step = Step(
158
+ id=step_data.get("id", len(steps) + 1),
159
+ type=step_type,
160
+ description=step_data.get("description", ""),
161
+ inputs=step_data.get("inputs", []),
162
+ outputs=step_data.get("outputs", []),
163
+ line_refs=step_data.get("line_refs", []),
164
+ condition=step_data.get("condition"),
165
+ body=step_data.get("body", []),
166
+ else_body=step_data.get("else_body", []),
167
+ iter_var=step_data.get("iter_var"),
168
+ iter_range=step_data.get("iter_range"),
169
+ expression=step_data.get("expression"),
170
+ call_target=step_data.get("call_target"),
171
+ arguments=step_data.get("arguments", []),
172
+ annotation=step_data.get("annotation")
173
+ )
174
+ steps.append(step)
175
+
176
+ algorithm.steps = steps
177
+
178
+ return algorithm
179
+
180
+ except Exception:
181
+ return None
182
+
183
+
184
+ class HybridExtractor:
185
+ """
186
+ Hybrid extractor combining rule-based and LLM extraction.
187
+
188
+ Per D-01, D-02 from 02-CONTEXT.md.
189
+ """
190
+
191
+ def __init__(self):
192
+ self.rule_parser = RuleBasedParser()
193
+ self.use_llm = True
194
+
195
+ def extract(self, text: str, prefer_llm: bool = True) -> ExtractionResult:
196
+ """
197
+ Extract algorithm using preferred method.
198
+
199
+ Args:
200
+ text: Algorithm description
201
+ prefer_llm: If True, try LLM first; else use rule-based
202
+
203
+ Returns:
204
+ ExtractionResult with extracted algorithm
205
+ """
206
+ if prefer_llm and self.use_llm:
207
+ return extract_algorithm_llm(text)
208
+ else:
209
+ try:
210
+ algorithm = self.rule_parser.parse(text)
211
+ return ExtractionResult(
212
+ algorithm=algorithm,
213
+ success=True,
214
+ method="rule_based",
215
+ errors=[],
216
+ line_references=[step.line_refs for step in algorithm.steps]
217
+ )
218
+ except Exception as e:
219
+ return ExtractionResult(
220
+ algorithm=Algorithm(name="unnamed", source_text=text),
221
+ success=False,
222
+ method="failed",
223
+ errors=[str(e)],
224
+ line_references=[]
225
+ )