algomath-extract 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +260 -0
  2. package/bin/algo-extract.js +143 -0
  3. package/bin/algo-generate.js +102 -0
  4. package/bin/algo-help.js +136 -0
  5. package/bin/algo-list.js +56 -0
  6. package/bin/algo-run.js +141 -0
  7. package/bin/algo-status.js +88 -0
  8. package/bin/algo-verify.js +189 -0
  9. package/bin/install.js +349 -0
  10. package/package.json +57 -0
  11. package/requirements.txt +20 -0
  12. package/src/__pycache__/intent.cpython-313.pyc +0 -0
  13. package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
  14. package/src/cli/cli_entry.py +106 -0
  15. package/src/cli/commands.py +339 -0
  16. package/src/execution/__init__.py +74 -0
  17. package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
  18. package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
  19. package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
  20. package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
  21. package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
  22. package/src/execution/display.py +261 -0
  23. package/src/execution/errors.py +158 -0
  24. package/src/execution/executor.py +253 -0
  25. package/src/execution/sandbox.py +333 -0
  26. package/src/extraction/__init__.py +102 -0
  27. package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
  28. package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
  29. package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
  30. package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
  31. package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
  32. package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
  33. package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
  34. package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
  35. package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
  36. package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
  37. package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
  38. package/src/extraction/boundaries.py +281 -0
  39. package/src/extraction/errors.py +156 -0
  40. package/src/extraction/llm_extraction.py +225 -0
  41. package/src/extraction/notation.py +240 -0
  42. package/src/extraction/parser.py +402 -0
  43. package/src/extraction/pdf_processor.py +281 -0
  44. package/src/extraction/prompts.py +90 -0
  45. package/src/extraction/review.py +298 -0
  46. package/src/extraction/schema.py +173 -0
  47. package/src/extraction/validation.py +202 -0
  48. package/src/generation/__init__.py +79 -0
  49. package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
  50. package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
  51. package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
  52. package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
  53. package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
  54. package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
  55. package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
  56. package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
  57. package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
  58. package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
  59. package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
  60. package/src/generation/code_generator.py +375 -0
  61. package/src/generation/errors.py +84 -0
  62. package/src/generation/hybrid.py +210 -0
  63. package/src/generation/llm_generator.py +223 -0
  64. package/src/generation/persistence.py +221 -0
  65. package/src/generation/prompts.py +202 -0
  66. package/src/generation/review.py +254 -0
  67. package/src/generation/templates.py +208 -0
  68. package/src/generation/types.py +196 -0
  69. package/src/generation/validation.py +278 -0
  70. package/src/intent.py +323 -0
  71. package/src/verification/__init__.py +63 -0
  72. package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
  73. package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
  74. package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
  75. package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
  76. package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
  77. package/src/verification/checker.py +220 -0
  78. package/src/verification/comparison.py +492 -0
  79. package/src/verification/explainer.py +414 -0
  80. package/src/verification/static_analysis.py +540 -0
  81. package/src/workflows/__init__.py +21 -0
  82. package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
  83. package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
  84. package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
  85. package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
  86. package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
  87. package/src/workflows/extract.py +181 -0
  88. package/src/workflows/generate.py +155 -0
  89. package/src/workflows/run.py +187 -0
  90. package/src/workflows/verify.py +334 -0
@@ -0,0 +1,240 @@
1
+ """Mathematical notation normalization for algorithm extraction.
2
+
3
+ Transforms common mathematical notation into normalized forms
4
+ that can be parsed by rule-based and LLM extractors.
5
+
6
+ Per D-09, D-10, D-11 from 02-CONTEXT.md.
7
+ """
8
+ import re
9
+ from typing import Tuple, Optional
10
+
11
+
12
+ def normalize_notation(text: str) -> str:
13
+ """
14
+ Normalize mathematical notation in algorithm text.
15
+
16
+ Performs transformations in order:
17
+ 1. Summation notation (Σ)
18
+ 2. Product notation (Π)
19
+ 3. Set membership (∈, ∉, ⊆, ⊇, ⊂, ⊃)
20
+ 4. Arrow notation (→, ←)
21
+ 5. Subscripts (x_i → x[i])
22
+ 6. Superscripts (x^2 → x**2)
23
+ 7. Mathematical operators (×, ÷, √, ±)
24
+
25
+ Args:
26
+ text: Raw algorithm text with mathematical notation
27
+
28
+ Returns:
29
+ Normalized text ready for parsing
30
+
31
+ Per D-09, D-10 from 02-CONTEXT.md.
32
+ """
33
+ result = text
34
+
35
+ # Transform summation and product notation first (multi-line constructs)
36
+ result = transform_summation(result)
37
+ result = transform_product(result)
38
+
39
+ # Transform set membership
40
+ result = transform_set_membership(result)
41
+
42
+ # Transform arrow notation
43
+ result = transform_arrow_notation(result)
44
+
45
+ # Transform subscripts and superscripts
46
+ result = transform_subscripts(result)
47
+ result = transform_superscripts(result)
48
+
49
+ # Transform mathematical operators
50
+ result = transform_operators(result)
51
+
52
+ return result
53
+
54
+
55
+ def transform_summation(text: str) -> str:
56
+ """
57
+ Transform summation notation Σ into normalized form.
58
+
59
+ Patterns handled:
60
+ - Σ_{i=1}^{n} f(i) → sum over i from 1 to n of f(i)
61
+ - Σ_{i∈S} f(i) → sum over i in S of f(i)
62
+ - Σ_{i=1}^{n} Σ_{j=1}^{m} → nested sums
63
+
64
+ Per D-09 from 02-CONTEXT.md.
65
+ """
66
+ # Pattern: Σ_{var=range}^{limit} expression
67
+ # Handle nested sums iteratively
68
+ result = text
69
+
70
+ # Single summation with range
71
+ pattern1 = r'[Σ\\sum]_\{(\w+)=([^}]+)\}\^\{([^}]+)\}\s*([^(]+?)(?:\(([^)]+)\))?$'
72
+ def replace_sum(match):
73
+ var = match.group(1)
74
+ start = match.group(2)
75
+ end = match.group(3)
76
+ func = match.group(4).strip() if match.group(4) else ""
77
+ arg = match.group(5) if match.group(5) else var
78
+ if func:
79
+ return f"sum over {var} from {start} to {end} of {func}({arg})"
80
+ return f"sum over {var} from {start} to {end}"
81
+
82
+ result = re.sub(pattern1, replace_sum, result, flags=re.MULTILINE)
83
+
84
+ # Summation over set
85
+ pattern2 = r'[Σ\\sum]_\{(\w+)\s*∈\s*(\w+)\}'
86
+ result = re.sub(pattern2, r'sum over \1 in \2', result)
87
+
88
+ return result
89
+
90
+
91
+ def transform_product(text: str) -> str:
92
+ """
93
+ Transform product notation Π into normalized form.
94
+
95
+ Patterns handled:
96
+ - Π_{i=1}^{n} f(i) → product over i from 1 to n of f(i)
97
+ - Π_{i∈S} f(i) → product over i in S of f(i)
98
+
99
+ Per D-09 from 02-CONTEXT.md.
100
+ """
101
+ result = text
102
+
103
+ # Single product with range
104
+ pattern1 = r'[Π\\prod]_\{(\w+)=([^}]+)\}\^\{([^}]+)\}'
105
+ result = re.sub(pattern1, r'product over \1 from \2 to \3', result)
106
+
107
+ # Product over set
108
+ pattern2 = r'[Π\\prod]_\{(\w+)\s*∈\s*(\w+)\}'
109
+ result = re.sub(pattern2, r'product over \1 in \2', result)
110
+
111
+ return result
112
+
113
+
114
+ def transform_set_membership(text: str) -> str:
115
+ """
116
+ Transform set membership notation into Python equivalents.
117
+
118
+ Transformations:
119
+ - x ∈ S → x in S
120
+ - x ∉ S → x not in S
121
+ - A ⊆ B → A.issubset(B) [or A subset of B for description]
122
+ - A ⊂ B → A proper subset of B
123
+ - A ⊇ B → A superset of B
124
+ - A ⊃ B → A proper superset of B
125
+
126
+ Per D-09 from 02-CONTEXT.md.
127
+ """
128
+ result = text
129
+
130
+ # Not in set
131
+ result = re.sub(r'(\w+)\s*∉\s*(\w+)', r'\1 not in \2', result)
132
+
133
+ # In set
134
+ result = re.sub(r'(\w+)\s*∈\s*(\w+)', r'\1 in \2', result)
135
+
136
+ # Subset and superset (use natural language for algorithm descriptions)
137
+ result = re.sub(r'(\w+)\s*⊆\s*(\w+)', r'\1 is subset of \2', result)
138
+ result = re.sub(r'(\w+)\s*⊂\s*(\w+)', r'\1 is proper subset of \2', result)
139
+ result = re.sub(r'(\w+)\s*⊇\s*(\w+)', r'\1 is superset of \2', result)
140
+ result = re.sub(r'(\w+)\s*⊃\s*(\w+)', r'\1 is proper superset of \2', result)
141
+
142
+ return result
143
+
144
+
145
+ def transform_arrow_notation(text: str) -> str:
146
+ """
147
+ Transform arrow notation into assignments.
148
+
149
+ Transformations:
150
+ - x → y → x = y (assignment)
151
+ - x ← y → x = y (assignment)
152
+ - x ↦ y → x maps to y
153
+
154
+ Per D-09 from 02-CONTEXT.md.
155
+ """
156
+ result = text
157
+
158
+ # Assignment arrows (preserve direction as =)
159
+ result = re.sub(r'(\w+)\s*→\s*(.+?)(?=$|\s+\w+\s*=|\s+[,.])', r'\1 = \2', result)
160
+ result = re.sub(r'(\w+)\s*←\s*(.+?)(?=$|\s+\w+\s*=|\s+[,.])', r'\1 = \2', result)
161
+
162
+ return result
163
+
164
+
165
+ def transform_subscripts(text: str) -> str:
166
+ """
167
+ Transform subscript notation into array indexing.
168
+
169
+ Transformations:
170
+ - x_i → x[i]
171
+ - x_{i,j} → x[i][j] or x[i, j]
172
+ - A_{i,j} → A[i][j] (matrix access)
173
+
174
+ Per D-11 from 02-CONTEXT.md.
175
+ """
176
+ result = text
177
+
178
+ # Simple subscript x_i
179
+ result = re.sub(r'(\w+)_\{(\w+)\}', r'\1[\2]', result)
180
+ result = re.sub(r'(\w+)_([a-zA-Z0-9])', r'\1[\2]', result)
181
+
182
+ return result
183
+
184
+
185
+ def transform_superscripts(text: str) -> str:
186
+ """
187
+ Transform superscript notation into power notation.
188
+
189
+ Transformations:
190
+ - x^2 → x**2
191
+ - x^{n} → x**n
192
+ - x^2_i → x[i]**2 (subscript takes precedence in rendering)
193
+
194
+ Per D-11 from 02-CONTEXT.md.
195
+ """
196
+ result = text
197
+
198
+ # Braced superscript x^{n}
199
+ result = re.sub(r'(\w+)\^\{(\w+)\}', r'\1**\2', result)
200
+
201
+ # Simple superscript x^2 (single char or digit)
202
+ result = re.sub(r'(\w+)\^(\d)', r'\1**\2', result)
203
+ result = re.sub(r'(\w+)\^([a-zA-Z])', r'\1**\2', result)
204
+
205
+ return result
206
+
207
+
208
+ def transform_operators(text: str) -> str:
209
+ """
210
+ Transform mathematical operators into Python equivalents.
211
+
212
+ Transformations:
213
+ - × → *
214
+ - ÷ → /
215
+ - √x → sqrt(x)
216
+ - ± → +/-
217
+ - ≤ → <=
218
+ - ≥ → >=
219
+ - ≠ → !=
220
+
221
+ Per D-10 from 02-CONTEXT.md.
222
+ """
223
+ result = text
224
+
225
+ # Comparison operators
226
+ result = result.replace('≤', '<=')
227
+ result = result.replace('≥', '>=')
228
+ result = result.replace('≠', '!=')
229
+ result = result.replace('≈', '~=')
230
+
231
+ # Arithmetic operators
232
+ result = result.replace('×', '*')
233
+ result = result.replace('÷', '/')
234
+ result = result.replace('±', '+/-')
235
+
236
+ # Square root
237
+ result = re.sub(r'√([\w\[\]]+)', r'sqrt(\1)', result)
238
+ result = re.sub(r'√\{([^}]+)\}', r'sqrt(\1)', result)
239
+
240
+ return result
@@ -0,0 +1,402 @@
1
+ """Rule-based parser for algorithm extraction.
2
+
3
+ Uses pattern matching and heuristics to extract structured
4
+ algorithm steps from mathematical text descriptions.
5
+
6
+ Per D-02 from 02-CONTEXT.md.
7
+ """
8
+ import re
9
+ from typing import List, Optional, Dict, Any, Tuple
10
+
11
+ from .schema import Algorithm, Step, StepType
12
+ from .notation import normalize_notation
13
+ from .boundaries import (
14
+ find_algorithm_name,
15
+ extract_input_section,
16
+ extract_output_section,
17
+ AlgorithmBoundaries,
18
+ detect_algorithm_boundaries
19
+ )
20
+
21
+
22
+ class RuleBasedParser:
23
+ """
24
+ Parser using regex rules to extract algorithm steps.
25
+
26
+ Integrates with notation normalization and boundary detection
27
+ to produce structured algorithm representations.
28
+
29
+ Per D-02, D-04 from 02-CONTEXT.md.
30
+ """
31
+
32
+ def __init__(self):
33
+ # Step detection patterns
34
+ self.step_patterns = [
35
+ # Numbered steps: "1. Do something" or "1) Do something"
36
+ (r'^(?:\s*)(\d+)[.\)]\s*(.+)$', self._parse_numbered_step),
37
+ # Step keyword: "Step 1: Do something"
38
+ (r'^(?:\s*)[Ss]tep\s*(\d+)[:.\)]\s*(.+)$', self._parse_numbered_step),
39
+ # Bullet points as steps
40
+ (r'^(?:\s*)[-*•]\s*(.+)$', self._parse_bullet_step),
41
+ ]
42
+
43
+ # Step type detection patterns
44
+ self.type_patterns = [
45
+ (r'^\s*[Rr]eturn', StepType.RETURN),
46
+ (r'^\s*[Oo]utput', StepType.RETURN),
47
+ (r'^\s*[Ff]or\s+each', StepType.LOOP_FOR),
48
+ (r'^\s*[Ff]or\s+\w+\s+(?:from|in|=)', StepType.LOOP_FOR),
49
+ (r'^\s*[Ff]or\s*\(', StepType.LOOP_FOR),
50
+ (r'^\s*[Rr]epeat', StepType.LOOP_FOR),
51
+ (r'^\s*[Ww]hile', StepType.LOOP_WHILE),
52
+ (r'^\s*[Uu]ntil', StepType.LOOP_WHILE),
53
+ (r'^\s*[Ii]f', StepType.CONDITIONAL),
54
+ (r'^\s*[Ww]hen', StepType.CONDITIONAL),
55
+ (r'^\s*[Cc]all\s+\w+\s*\(', StepType.CALL),
56
+ (r'^\s*[Ii]nvoke', StepType.CALL),
57
+ ]
58
+
59
+ def parse(self, text: str, name: Optional[str] = None) -> Algorithm:
60
+ """
61
+ Parse text into an Algorithm.
62
+
63
+ Args:
64
+ text: Raw algorithm text
65
+ name: Optional algorithm name (auto-detected if not provided)
66
+
67
+ Returns:
68
+ Algorithm object with extracted steps
69
+
70
+ Per D-02, D-04 from 02-CONTEXT.md.
71
+ """
72
+ # Normalize mathematical notation
73
+ normalized = normalize_notation(text)
74
+
75
+ # Detect boundaries
76
+ boundaries = detect_algorithm_boundaries(text)
77
+
78
+ # Determine algorithm name
79
+ if not name:
80
+ name = boundaries.name
81
+ if not name:
82
+ name = "unnamed"
83
+
84
+ # Extract inputs and outputs
85
+ _, _, input_descs = extract_input_section(text)
86
+ _, _, output_descs = extract_output_section(text)
87
+
88
+ # Parse steps
89
+ steps = self._parse_steps(normalized, boundaries)
90
+
91
+ return Algorithm(
92
+ name=name,
93
+ inputs=self._parse_inputs(input_descs),
94
+ outputs=self._parse_outputs(output_descs),
95
+ steps=steps,
96
+ source_text=text
97
+ )
98
+
99
+ def _parse_steps(self, text: str, boundaries: AlgorithmBoundaries) -> List[Step]:
100
+ """
101
+ Parse steps from normalized text.
102
+
103
+ Args:
104
+ text: Normalized text
105
+ boundaries: Detected boundaries
106
+
107
+ Returns:
108
+ List of Step objects
109
+ """
110
+ lines = text.split('\n')
111
+ steps = []
112
+ step_id = 1
113
+
114
+ # Determine which lines to parse
115
+ start_line = boundaries.steps_start or 1
116
+ end_line = boundaries.steps_end or len(lines)
117
+
118
+ for line_num in range(start_line, min(end_line + 1, len(lines) + 1)):
119
+ line = lines[line_num - 1]
120
+ stripped = line.strip()
121
+
122
+ if not stripped:
123
+ continue
124
+
125
+ # Try to match step patterns
126
+ matched = False
127
+ for pattern, handler in self.step_patterns:
128
+ match = re.match(pattern, line)
129
+ if match:
130
+ step = handler(match, step_id, line_num)
131
+ if step:
132
+ steps.append(step)
133
+ step_id += 1
134
+ matched = True
135
+ break
136
+
137
+ # If no pattern matched but line looks like a step
138
+ if not matched and len(stripped) > 10 and not self._is_section_header(stripped):
139
+ step = self._create_step(step_id, StepType.ASSIGNMENT, stripped, line_num)
140
+ steps.append(step)
141
+ step_id += 1
142
+
143
+ return steps
144
+
145
+ def _parse_numbered_step(self, match, step_id: int, line_num: int) -> Optional[Step]:
146
+ """Parse a numbered step match."""
147
+ # For numbered pattern, group 2 contains the content
148
+ text = match.group(2).strip() if len(match.groups()) > 1 else match.group(1).strip()
149
+
150
+ step_type = self._classify_step_type(text)
151
+ return self._create_step(step_id, step_type, text, line_num)
152
+
153
+ def _parse_bullet_step(self, match, step_id: int, line_num: int) -> Optional[Step]:
154
+ """Parse a bullet point step."""
155
+ text = match.group(1).strip()
156
+ step_type = self._classify_step_type(text)
157
+ return self._create_step(step_id, step_type, text, line_num)
158
+
159
+ def _create_step(self, step_id: int, step_type: StepType, text: str, line_num: int) -> Step:
160
+ """Create a step with extracted metadata."""
161
+ inputs, outputs = self._extract_variables(text)
162
+
163
+ # Extract additional fields based on type
164
+ condition = None
165
+ expression = None
166
+ iter_var = None
167
+ iter_range = None
168
+
169
+ if step_type == StepType.LOOP_FOR:
170
+ iter_var, iter_range = self._extract_for_loop_details(text)
171
+ elif step_type == StepType.LOOP_WHILE:
172
+ condition = self._extract_while_condition(text)
173
+ elif step_type == StepType.CONDITIONAL:
174
+ condition = self._extract_if_condition(text)
175
+ elif step_type == StepType.RETURN:
176
+ expression = self._extract_return_value(text)
177
+ elif step_type == StepType.ASSIGNMENT:
178
+ expression = self._extract_assignment_expression(text)
179
+
180
+ return Step(
181
+ id=step_id,
182
+ type=step_type,
183
+ description=text,
184
+ inputs=inputs,
185
+ outputs=outputs,
186
+ line_refs=[line_num],
187
+ condition=condition,
188
+ expression=expression,
189
+ iter_var=iter_var,
190
+ iter_range=iter_range
191
+ )
192
+
193
+ def _classify_step_type(self, text: str) -> StepType:
194
+ """
195
+ Classify step type from text using patterns.
196
+
197
+ Returns most specific matching type.
198
+ """
199
+ text_lower = text.lower().strip()
200
+
201
+ for pattern, step_type in self.type_patterns:
202
+ if re.search(pattern, text_lower):
203
+ return step_type
204
+
205
+ # Check for assignment
206
+ if re.search(r'[=←]|\s+is\s+|\s+gets\s+|\s+set\s+to\s+', text_lower):
207
+ return StepType.ASSIGNMENT
208
+
209
+ # Check for function call
210
+ if re.search(r'\w+\s*\([^)]*\)', text_lower):
211
+ return StepType.CALL
212
+
213
+ return StepType.COMMENT
214
+
215
+ def _extract_variables(self, text: str) -> Tuple[List[str], List[str]]:
216
+ """
217
+ Extract input and output variables from step text.
218
+
219
+ Args:
220
+ text: Step description
221
+
222
+ Returns:
223
+ Tuple of (input_vars, output_vars)
224
+ """
225
+ inputs = []
226
+ outputs = []
227
+
228
+ # Find assignments: x = ..., x ← ..., x gets ..., etc.
229
+ assign_patterns = [
230
+ r'(?:initialize|set)\s+(\w+)',
231
+ r'(\w+)\s*[=←]',
232
+ r'(\w+)\s+is\s+set\s+to',
233
+ r'(\w+)\s+gets',
234
+ ]
235
+
236
+ for pattern in assign_patterns:
237
+ match = re.search(pattern, text, re.IGNORECASE)
238
+ if match:
239
+ outputs.append(match.group(1))
240
+ break
241
+
242
+ # Find all variable references (avoid keywords)
243
+ keywords = {'for', 'while', 'if', 'else', 'return', 'output',
244
+ 'end', 'then', 'do', 'in', 'to', 'from', 'and', 'or'}
245
+
246
+ var_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
247
+ for match in re.finditer(var_pattern, text):
248
+ var = match.group(1)
249
+ if var not in keywords and var not in outputs:
250
+ inputs.append(var)
251
+
252
+ return inputs, outputs
253
+
254
+ def _extract_for_loop_details(self, text: str) -> Tuple[Optional[str], Optional[str]]:
255
+ """Extract iteration variable and range from for loop."""
256
+ # Pattern: for i from 1 to n
257
+ match = re.search(r'[Ff]or\s+(\w+)\s+(?:from|in)\s+(.+?)(?:\s+to|\s+do|\s*:|$)', text)
258
+ if match:
259
+ return match.group(1), match.group(2)
260
+
261
+ # Pattern: for each x in S
262
+ match = re.search(r'[Ff]or\s+each\s+(\w+)\s+in\s+(\w+)', text)
263
+ if match:
264
+ return match.group(1), match.group(2)
265
+
266
+ return None, None
267
+
268
+ def _extract_while_condition(self, text: str) -> Optional[str]:
269
+ """Extract condition from while loop."""
270
+ match = re.search(r'[Ww]hile\s+(.+?)(?:\s*:|\s+do|\s*$)', text)
271
+ if match:
272
+ return match.group(1).strip()
273
+ return None
274
+
275
+ def _extract_if_condition(self, text: str) -> Optional[str]:
276
+ """Extract condition from if statement."""
277
+ match = re.search(r'[Ii]f\s+(.+?)(?:\s*:|\s+then|\s*$)', text)
278
+ if match:
279
+ return match.group(1).strip()
280
+ return None
281
+
282
+ def _extract_return_value(self, text: str) -> Optional[str]:
283
+ """Extract return value from return statement."""
284
+ match = re.search(r'[Rr]eturn\s+(.+)$', text)
285
+ if match:
286
+ return match.group(1).strip()
287
+ # Also match Output
288
+ match = re.search(r'[Oo]utput\s+(.+)$', text)
289
+ if match:
290
+ return match.group(1).strip()
291
+ return None
292
+
293
+ def _extract_assignment_expression(self, text: str) -> Optional[str]:
294
+ """Extract expression from assignment."""
295
+ match = re.search(r'[=←]\s*(.+)$', text)
296
+ if match:
297
+ return match.group(1).strip()
298
+ return None
299
+
300
+ def _is_section_header(self, text: str) -> bool:
301
+ """Check if text is a section header."""
302
+ header_patterns = [
303
+ r'^(?:Input|Output|Algorithm|Procedure|Function|Method)',
304
+ r'^(?:Given|Parameters|Returns|Result)',
305
+ ]
306
+ for pattern in header_patterns:
307
+ if re.match(pattern, text, re.IGNORECASE):
308
+ return True
309
+ return False
310
+
311
+ def _parse_inputs(self, input_descriptions: List[str]) -> List[Dict[str, Any]]:
312
+ """
313
+ Parse input descriptions into structured format.
314
+
315
+ Per D-15 from 02-CONTEXT.md.
316
+ """
317
+ inputs = []
318
+
319
+ for desc in input_descriptions:
320
+ # Try to extract variable name and type
321
+ # Pattern: "A[1..n] - array of integers"
322
+ match = re.search(r'(\w+(?:\[[^\]]*\])?)\s*(?:-|,|\s)\s*(.+)', desc)
323
+ if match:
324
+ name = match.group(1)
325
+ type_desc = match.group(2)
326
+ var_type = self._infer_type(type_desc)
327
+ else:
328
+ # Just variable name
329
+ name = desc.strip()
330
+ type_desc = ""
331
+ var_type = "unknown"
332
+
333
+ inputs.append({
334
+ "name": name,
335
+ "type": var_type,
336
+ "description": desc
337
+ })
338
+
339
+ return inputs
340
+
341
+ def _parse_outputs(self, output_descriptions: List[str]) -> List[Dict[str, Any]]:
342
+ """
343
+ Parse output descriptions into structured format.
344
+
345
+ Per D-16 from 02-CONTEXT.md.
346
+ """
347
+ outputs = []
348
+
349
+ for desc in output_descriptions:
350
+ match = re.search(r'(\w+(?:\[[^\]]*\])?)\s*(?:-|,|\s)\s*(.+)', desc)
351
+ if match:
352
+ name = match.group(1)
353
+ type_desc = match.group(2)
354
+ var_type = self._infer_type(type_desc)
355
+ else:
356
+ name = desc.strip()
357
+ type_desc = ""
358
+ var_type = "unknown"
359
+
360
+ outputs.append({
361
+ "name": name,
362
+ "type": var_type,
363
+ "description": desc
364
+ })
365
+
366
+ return outputs
367
+
368
+ def _infer_type(self, description: str) -> str:
369
+ """Infer variable type from description."""
370
+ desc_lower = description.lower()
371
+
372
+ if any(word in desc_lower for word in ['array', 'list', 'sequence']):
373
+ if any(word in desc_lower for word in ['matrix', '2d', 'two-dimensional']):
374
+ return "matrix"
375
+ return "array"
376
+ if any(word in desc_lower for word in ['matrix', 'grid', 'table']):
377
+ return "matrix"
378
+ if any(word in desc_lower for word in ['integer', 'int', 'whole number']):
379
+ return "int"
380
+ if any(word in desc_lower for word in ['float', 'real', 'decimal', 'number']):
381
+ return "float"
382
+ if any(word in desc_lower for word in ['boolean', 'bool', 'true', 'false']):
383
+ return "bool"
384
+ if any(word in desc_lower for word in ['string', 'text']):
385
+ return "str"
386
+
387
+ return "unknown"
388
+
389
+
390
+ def parse_algorithm(text: str, name: Optional[str] = None) -> Algorithm:
391
+ """
392
+ Convenience function to parse algorithm text.
393
+
394
+ Args:
395
+ text: Algorithm description
396
+ name: Optional algorithm name
397
+
398
+ Returns:
399
+ Parsed Algorithm object
400
+ """
401
+ parser = RuleBasedParser()
402
+ return parser.parse(text, name)