npm - algomath-extract - Versions diffs - 1.0.0 - Mend

algomath-extract 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md +260 -0
package/bin/algo-extract.js +143 -0
package/bin/algo-generate.js +102 -0
package/bin/algo-help.js +136 -0
package/bin/algo-list.js +56 -0
package/bin/algo-run.js +141 -0
package/bin/algo-status.js +88 -0
package/bin/algo-verify.js +189 -0
package/bin/install.js +349 -0
package/package.json +57 -0
package/requirements.txt +20 -0
package/src/__pycache__/intent.cpython-313.pyc +0 -0
package/src/cli/__pycache__/commands.cpython-313.pyc +0 -0
package/src/cli/cli_entry.py +106 -0
package/src/cli/commands.py +339 -0
package/src/execution/__init__.py +74 -0
package/src/execution/__pycache__/__init__.cpython-313.pyc +0 -0
package/src/execution/__pycache__/display.cpython-313.pyc +0 -0
package/src/execution/__pycache__/errors.cpython-313.pyc +0 -0
package/src/execution/__pycache__/executor.cpython-313.pyc +0 -0
package/src/execution/__pycache__/sandbox.cpython-313.pyc +0 -0
package/src/execution/display.py +261 -0
package/src/execution/errors.py +158 -0
package/src/execution/executor.py +253 -0
package/src/execution/sandbox.py +333 -0
package/src/extraction/__init__.py +102 -0
package/src/extraction/__pycache__/__init__.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/boundaries.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/errors.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/llm_extraction.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/notation.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/parser.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/pdf_processor.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/prompts.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/review.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/schema.cpython-313.pyc +0 -0
package/src/extraction/__pycache__/validation.cpython-313.pyc +0 -0
package/src/extraction/boundaries.py +281 -0
package/src/extraction/errors.py +156 -0
package/src/extraction/llm_extraction.py +225 -0
package/src/extraction/notation.py +240 -0
package/src/extraction/parser.py +402 -0
package/src/extraction/pdf_processor.py +281 -0
package/src/extraction/prompts.py +90 -0
package/src/extraction/review.py +298 -0
package/src/extraction/schema.py +173 -0
package/src/extraction/validation.py +202 -0
package/src/generation/__init__.py +79 -0
package/src/generation/__pycache__/__init__.cpython-313.pyc +0 -0
package/src/generation/__pycache__/code_generator.cpython-313.pyc +0 -0
package/src/generation/__pycache__/errors.cpython-313.pyc +0 -0
package/src/generation/__pycache__/hybrid.cpython-313.pyc +0 -0
package/src/generation/__pycache__/llm_generator.cpython-313.pyc +0 -0
package/src/generation/__pycache__/persistence.cpython-313.pyc +0 -0
package/src/generation/__pycache__/prompts.cpython-313.pyc +0 -0
package/src/generation/__pycache__/review.cpython-313.pyc +0 -0
package/src/generation/__pycache__/templates.cpython-313.pyc +0 -0
package/src/generation/__pycache__/types.cpython-313.pyc +0 -0
package/src/generation/__pycache__/validation.cpython-313.pyc +0 -0
package/src/generation/code_generator.py +375 -0
package/src/generation/errors.py +84 -0
package/src/generation/hybrid.py +210 -0
package/src/generation/llm_generator.py +223 -0
package/src/generation/persistence.py +221 -0
package/src/generation/prompts.py +202 -0
package/src/generation/review.py +254 -0
package/src/generation/templates.py +208 -0
package/src/generation/types.py +196 -0
package/src/generation/validation.py +278 -0
package/src/intent.py +323 -0
package/src/verification/__init__.py +63 -0
package/src/verification/__pycache__/__init__.cpython-313.pyc +0 -0
package/src/verification/__pycache__/checker.cpython-313.pyc +0 -0
package/src/verification/__pycache__/comparison.cpython-313.pyc +0 -0
package/src/verification/__pycache__/explainer.cpython-313.pyc +0 -0
package/src/verification/__pycache__/static_analysis.cpython-313.pyc +0 -0
package/src/verification/checker.py +220 -0
package/src/verification/comparison.py +492 -0
package/src/verification/explainer.py +414 -0
package/src/verification/static_analysis.py +540 -0
package/src/workflows/__init__.py +21 -0
package/src/workflows/__pycache__/__init__.cpython-313.pyc +0 -0
package/src/workflows/__pycache__/extract.cpython-313.pyc +0 -0
package/src/workflows/__pycache__/generate.cpython-313.pyc +0 -0
package/src/workflows/__pycache__/run.cpython-313.pyc +0 -0
package/src/workflows/__pycache__/verify.cpython-313.pyc +0 -0
package/src/workflows/extract.py +181 -0
package/src/workflows/generate.py +155 -0
package/src/workflows/run.py +187 -0
package/src/workflows/verify.py +334 -0

package/src/extraction/pdf_processor.py ADDED Viewed

@@ -0,0 +1,281 @@
+"""PDF and text file processing module.
+Provides functionality to extract text from PDFs and text files,
+auto-detect file types, and handle encoding issues.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Optional, Tuple
+from dataclasses import dataclass
+@dataclass
+class PDFExtractionResult:
+    """Result of PDF/text extraction."""
+    text: str
+    file_type: str  # 'pdf', 'text', or 'unknown'
+    page_count: int
+    encoding: str
+    success: bool
+    error: Optional[str] = None
+class PDFProcessor:
+    """Process PDF and text files to extract algorithm text."""
+    def __init__(self):
+        self.supported_extensions = {'.pdf', '.txt', '.md', '.markdown'}
+        self.text_extensions = {'.txt', '.md', '.markdown'}
+    def extract_text(self, file_path: str) -> PDFExtractionResult:
+        """
+        Extract text from file (PDF or text).
+        Args:
+            file_path: Path to file
+        Returns:
+            PDFExtractionResult with extracted text
+        """
+        path = Path(file_path)
+        # Check if file exists
+        if not path.exists():
+            return PDFExtractionResult(
+                text="",
+                file_type="unknown",
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"File not found: {file_path}"
+            )
+        # Check extension
+        ext = path.suffix.lower()
+        if ext not in self.supported_extensions:
+            return PDFExtractionResult(
+                text="",
+                file_type="unknown",
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"Unsupported file type: {ext}. Supported: {self.supported_extensions}"
+            )
+        # Route to appropriate handler
+        if ext == '.pdf':
+            return self._extract_pdf(file_path)
+        elif ext in self.text_extensions:
+            return self._read_text_file(file_path)
+        else:
+            return PDFExtractionResult(
+                text="",
+                file_type="unknown",
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"Unknown file type: {ext}"
+            )
+    def _extract_pdf(self, file_path: str) -> PDFExtractionResult:
+        """
+        Extract text from PDF file.
+        Uses pdfplumber for text-based PDFs.
+        Falls back to PyMuPDF for complex layouts.
+        """
+        try:
+            # Try pdfplumber first (best for text-based PDFs)
+            import pdfplumber
+            text_parts = []
+            page_count = 0
+            with pdfplumber.open(file_path) as pdf:
+                page_count = len(pdf.pages)
+                for i, page in enumerate(pdf.pages):
+                    page_text = page.extract_text()
+                    if page_text:
+                        text_parts.append(page_text)
+            full_text = '\n\n'.join(text_parts)
+            # Check if we got any text
+            if not full_text.strip():
+                # Try PyMuPDF as fallback for image-based or complex PDFs
+                return self._extract_pdf_with_pymupdf(file_path)
+            return PDFExtractionResult(
+                text=full_text,
+                file_type='pdf',
+                page_count=page_count,
+                encoding='utf-8',
+                success=True
+            )
+        except ImportError:
+            # pdfplumber not installed, try PyMuPDF
+            return self._extract_pdf_with_pymupdf(file_path)
+        except Exception as e:
+            return PDFExtractionResult(
+                text="",
+                file_type='pdf',
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"PDF extraction failed: {str(e)}"
+            )
+    def _extract_pdf_with_pymupdf(self, file_path: str) -> PDFExtractionResult:
+        """Fallback PDF extraction using PyMuPDF (fitz)."""
+        try:
+            import fitz  # PyMuPDF
+            text_parts = []
+            page_count = 0
+            with fitz.open(file_path) as doc:
+                page_count = len(doc)
+                for page in doc:
+                    text_parts.append(page.get_text())
+            full_text = '\n\n'.join(text_parts)
+            return PDFExtractionResult(
+                text=full_text,
+                file_type='pdf',
+                page_count=page_count,
+                encoding='utf-8',
+                success=True
+            )
+        except ImportError:
+            return PDFExtractionResult(
+                text="",
+                file_type='pdf',
+                page_count=0,
+                encoding="",
+                success=False,
+                error="PDF libraries not installed. Run: pip install pdfplumber pymupdf"
+            )
+        except Exception as e:
+            return PDFExtractionResult(
+                text="",
+                file_type='pdf',
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"PDF extraction failed: {str(e)}"
+            )
+    def _read_text_file(self, file_path: str) -> PDFExtractionResult:
+        """Read plain text file."""
+        try:
+            # Try UTF-8 first
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
+            return PDFExtractionResult(
+                text=text,
+                file_type='text',
+                page_count=1,
+                encoding='utf-8',
+                success=True
+            )
+        except UnicodeDecodeError:
+            # Try other encodings
+            encodings = ['latin-1', 'cp1252', 'iso-8859-1']
+            for encoding in encodings:
+                try:
+                    with open(file_path, 'r', encoding=encoding) as f:
+                        text = f.read()
+                    return PDFExtractionResult(
+                        text=text,
+                        file_type='text',
+                        page_count=1,
+                        encoding=encoding,
+                        success=True
+                    )
+                except UnicodeDecodeError:
+                    continue
+            return PDFExtractionResult(
+                text="",
+                file_type='text',
+                page_count=0,
+                encoding="",
+                success=False,
+                error="Could not decode file. Try specifying encoding."
+            )
+        except Exception as e:
+            return PDFExtractionResult(
+                text="",
+                file_type='text',
+                page_count=0,
+                encoding="",
+                success=False,
+                error=f"Text file read failed: {str(e)}"
+            )
+    def is_text_based_pdf(self, file_path: str) -> bool:
+        """
+        Check if PDF contains extractable text vs just images.
+        Returns True if PDF has text content.
+        """
+        try:
+            import pdfplumber
+            with pdfplumber.open(file_path) as pdf:
+                for page in pdf.pages:
+                    text = page.extract_text()
+                    if text and text.strip():
+                        return True
+            return False
+        except Exception:
+            return False
+    def extract_algorithm_section(self, text: str) -> Tuple[str, int, int]:
+        """
+        Attempt to find algorithm section within extracted text.
+        Args:
+            text: Full extracted text
+        Returns:
+            Tuple of (algorithm_text, start_index, end_index)
+        """
+        # Common patterns that indicate algorithm sections
+        patterns = [
+            r'(?i)(?:algorithm|procedure|function)\s+\w+.*?\n.*?\n(?:end|return)',
+            r'(?i)(?:input|output):.*?\n.*?\n(?:end|return)',
+            r'(?i)step\s*\d+[:.]\s*.+?(?:\n\n|\Z)',
+        ]
+        for pattern in patterns:
+            matches = list(re.finditer(pattern, text, re.DOTALL))
+            if matches:
+                # Return the longest match (most likely complete algorithm)
+                longest = max(matches, key=lambda m: len(m.group()))
+                return (
+                    longest.group(),
+                    longest.start(),
+                    longest.end()
+                )
+        # If no pattern matched, return first 5000 chars (heuristic)
+        return (text[:5000], 0, min(5000, len(text)))
+# Convenience function
+def extract_text_from_file(file_path: str) -> PDFExtractionResult:
+    """Extract text from PDF or text file."""
+    processor = PDFProcessor()
+    return processor.extract_text(file_path)

package/src/extraction/prompts.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""LLM prompts for algorithm extraction.
+This module provides system and user prompts for LLM-based algorithm extraction,
+designed to guide the LLM to produce structured JSON output.
+"""
+EXTRACTION_SYSTEM_PROMPT = """You are an expert at parsing mathematical algorithms from natural language descriptions.
+Your task is to analyze algorithm text and extract a structured JSON representation.
+RULES:
+1. Identify the algorithm name from headers like "Algorithm X", "Procedure Y"
+2. Extract inputs and outputs from Input/Output sections
+3. Parse each step with accurate type classification
+4. Preserve line references to the original text
+5. Handle mathematical notation (Σ, Π, subscripts, superscripts)
+6. Output valid JSON only - no explanatory text
+STEP TYPES (classify each step):
+- assignment: Variable assignment (x = y, x ← y)
+- loop_for: For loops (for each, for i from, repeat n times)
+- loop_while: While loops (while condition, until condition)
+- conditional: If statements (if, when, in case)
+- return: Return statements (return x, output x, result)
+- call: Function calls (call f(), invoke)
+- comment: Annotations and explanations
+JSON OUTPUT FORMAT:
+{
+  "name": "algorithm name or 'unnamed'",
+  "description": "brief description",
+  "inputs": [{"name": "var", "type": "inferred", "description": ""}],
+  "outputs": [{"name": "var", "type": "inferred", "description": ""}],
+  "steps": [
+    {
+      "id": 1,
+      "type": "assignment",
+      "description": "human readable description",
+      "inputs": ["read vars"],
+      "outputs": ["written vars"],
+      "line_refs": [line_numbers],
+      "condition": null,
+      "body": [],
+      "else_body": [],
+      "iter_var": null,
+      "iter_range": null,
+      "expression": null,
+      "call_target": null,
+      "arguments": [],
+      "annotation": null
+    }
+  ],
+  "source_text": "original text with line numbers"
+}
+Infer types: int, float, array, matrix, bool based on context.
+Always include line_refs array showing which original lines this step came from."""
+EXTRACTION_USER_PROMPT_TEMPLATE = """Extract the algorithm from this mathematical text:
+```
+{numbered_text}
+```
+Provide the structured JSON representation following the schema provided.
+Return ONLY the JSON. No explanatory text outside the JSON."""
+def format_extraction_prompt(text: str) -> str:
+    """
+    Format extraction prompt with numbered text.
+    Args:
+        text: Raw algorithm text
+    Returns:
+        Formatted prompt with line numbers for traceability
+    Per D-08 from 02-CONTEXT.md.
+    """
+    lines = text.split('\n')
+    numbered_lines = []
+    for i, line in enumerate(lines, 1):
+        numbered_lines.append(f"{i:3d}: {line}")
+    numbered_text = '\n'.join(numbered_lines)
+    return EXTRACTION_USER_PROMPT_TEMPLATE.format(numbered_text=numbered_text)

package/src/extraction/review.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""User review interface for algorithm extraction."""
+from typing import List, Dict, Any, Optional, Tuple
+from copy import deepcopy
+from .schema import Algorithm, Step, StepType
+def validate_step_edit(step: Step, edits: Dict[str, Any]) -> Tuple[bool, List[str]]:
+    """
+    Validate proposed edits to a step.
+    Args:
+        step: Original step
+        edits: Dictionary of proposed changes
+    Returns:
+        Tuple of (is_valid, list_of_errors)
+    Per D-20 from 02-CONTEXT.md.
+    """
+    errors = []
+    # Validate step type
+    if "type" in edits:
+        try:
+            StepType(edits["type"])
+        except ValueError:
+            errors.append(f"Invalid step type: {edits['type']}")
+    # Validate id is positive integer
+    if "id" in edits:
+        if not isinstance(edits["id"], int) or edits["id"] < 1:
+            errors.append("Step ID must be a positive integer")
+    # Validate description is non-empty
+    if "description" in edits:
+        if not edits["description"] or not str(edits["description"]).strip():
+            errors.append("Step description cannot be empty")
+    # Validate inputs and outputs are lists of strings
+    if "inputs" in edits:
+        if not isinstance(edits["inputs"], list):
+            errors.append("Inputs must be a list")
+        elif not all(isinstance(x, str) for x in edits["inputs"]):
+            errors.append("All inputs must be strings")
+    if "outputs" in edits:
+        if not isinstance(edits["outputs"], list):
+            errors.append("Outputs must be a list")
+        elif not all(isinstance(x, str) for x in edits["outputs"]):
+            errors.append("All outputs must be strings")
+    return len(errors) == 0, errors
+class ReviewInterface:
+    """
+    Interface for reviewing and editing extracted algorithms.
+    Per D-18, D-19 from 02-CONTEXT.md.
+    """
+    def __init__(self, algorithm: Algorithm):
+        self.original = algorithm
+        self.working = deepcopy(algorithm)
+        self.pending_edits: List[Dict] = []
+    def get_side_by_side(self) -> Dict[str, Any]:
+        """
+        Get side-by-side view data for UI rendering.
+        Returns:
+            Dict with original_text and structured_steps for display
+        Per D-18 from 02-CONTEXT.md.
+        """
+        return {
+            "original_text": self.original.source_text,
+            "algorithm_name": self.working.name,
+            "inputs": self.working.inputs,
+            "outputs": self.working.outputs,
+            "steps": [
+                {
+                    "id": step.id,
+                    "type": step.type.value,
+                    "description": step.description,
+                    "inputs": step.inputs,
+                    "outputs": step.outputs,
+                    "line_refs": step.line_refs
+                }
+                for step in self.working.steps
+            ],
+            "step_count": len(self.working.steps)
+        }
+    def edit_step(self, step_id: int, edits: Dict[str, Any]) -> Tuple[bool, List[str]]:
+        """
+        Edit a specific step.
+        Args:
+            step_id: ID of step to edit
+            edits: Dictionary of changes {field: new_value}
+        Returns:
+            Tuple of (success, errors)
+        Per D-19 from 02-CONTEXT.md.
+        """
+        # Find step
+        step = self._find_step(step_id)
+        if not step:
+            return False, [f"Step {step_id} not found"]
+        # Validate edits
+        is_valid, errors = validate_step_edit(step, edits)
+        if not is_valid:
+            return False, errors
+        # Apply edits
+        if "type" in edits:
+            step.type = StepType(edits["type"])
+        if "description" in edits:
+            step.description = edits["description"]
+        if "inputs" in edits:
+            step.inputs = edits["inputs"]
+        if "outputs" in edits:
+            step.outputs = edits["outputs"]
+        if "expression" in edits:
+            step.expression = edits["expression"]
+        if "condition" in edits:
+            step.condition = edits["condition"]
+        self.pending_edits.append({"action": "edit", "step_id": step_id, "edits": edits})
+        return True, []
+    def reorder_step(self, step_id: int, new_position: int) -> Tuple[bool, str]:
+        """
+        Move a step to a new position.
+        Args:
+            step_id: ID of step to move
+            new_position: New position index (1-based)
+        Returns:
+            Tuple of (success, message)
+        Per D-19 from 02-CONTEXT.md.
+        """
+        steps = self.working.steps
+        # Find current index
+        current_idx = None
+        for i, step in enumerate(steps):
+            if step.id == step_id:
+                current_idx = i
+                break
+        if current_idx is None:
+            return False, f"Step {step_id} not found"
+        # Clamp position
+        new_position = max(1, min(new_position, len(steps)))
+        new_idx = new_position - 1
+        if current_idx == new_idx:
+            return True, "No change needed"
+        # Move step
+        step = steps.pop(current_idx)
+        steps.insert(new_idx, step)
+        # Renumber all steps
+        self._renumber_steps()
+        self.pending_edits.append({"action": "reorder", "step_id": step_id, "new_position": new_position})
+        return True, f"Step moved to position {new_position}"
+    def delete_step(self, step_id: int) -> Tuple[bool, str]:
+        """
+        Delete a step.
+        Args:
+            step_id: ID of step to delete
+        Returns:
+            Tuple of (success, message)
+        Per D-19 from 02-CONTEXT.md.
+        """
+        steps = self.working.steps
+        for i, step in enumerate(steps):
+            if step.id == step_id:
+                steps.pop(i)
+                self._renumber_steps()
+                self.pending_edits.append({"action": "delete", "step_id": step_id})
+                return True, f"Step {step_id} deleted"
+        return False, f"Step {step_id} not found"
+    def add_step(self, position: int, step_data: Dict[str, Any]) -> Tuple[bool, List[str]]:
+        """
+        Add a new step at specified position.
+        Args:
+            position: Position to insert (1-based, or -1 for end)
+            step_data: Step data dictionary
+        Returns:
+            Tuple of (success, errors_or_message)
+        Per D-19 from 02-CONTEXT.md.
+        """
+        # Validate
+        temp_step = Step(id=0, type=StepType.COMMENT, description="")
+        is_valid, errors = validate_step_edit(temp_step, step_data)
+        if not is_valid:
+            return False, errors
+        # Create step
+        step = Step(
+            id=0,  # Will be renumbered
+            type=StepType(step_data.get("type", "comment")),
+            description=step_data.get("description", ""),
+            inputs=step_data.get("inputs", []),
+            outputs=step_data.get("outputs", []),
+            line_refs=[]
+        )
+        # Insert
+        steps = self.working.steps
+        if position == -1 or position > len(steps):
+            steps.append(step)
+        else:
+            steps.insert(position - 1, step)
+        self._renumber_steps()
+        self.pending_edits.append({"action": "add", "position": position, "step_data": step_data})
+        return True, [f"Step added at position {position}"]
+    def _find_step(self, step_id: int) -> Optional[Step]:
+        """Find step by ID."""
+        for step in self.working.steps:
+            if step.id == step_id:
+                return step
+        return None
+    def _renumber_steps(self):
+        """Renumber all steps sequentially."""
+        for i, step in enumerate(self.working.steps, 1):
+            step.id = i
+    def get_pending_changes(self) -> List[Dict]:
+        """Get list of pending edits."""
+        return self.pending_edits
+    def reset(self):
+        """Reset to original algorithm."""
+        self.working = deepcopy(self.original)
+        self.pending_edits = []
+    def approve(self) -> Algorithm:
+        """
+        Approve and return final algorithm.
+        Per D-22 from 02-CONTEXT.md.
+        """
+        return self.working
+def apply_edits(algorithm: Algorithm, edits: List[Dict]) -> Algorithm:
+    """
+    Apply a series of edits to an algorithm.
+    Args:
+        algorithm: Original algorithm
+        edits: List of edit operations
+    Returns:
+        Modified algorithm
+    """
+    review = ReviewInterface(algorithm)
+    for edit in edits:
+        action = edit.get("action")
+        if action == "edit":
+            review.edit_step(edit["step_id"], edit["edits"])
+        elif action == "reorder":
+            review.reorder_step(edit["step_id"], edit["new_position"])
+        elif action == "delete":
+            review.delete_step(edit["step_id"])
+        elif action == "add":
+            review.add_step(edit["position"], edit["step_data"])
+    return review.approve()