PyPI - paper2torch - Versions diffs - 0.1.0__tar.gz - Mend

paper2torch 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

paper2torch-0.1.0/PKG-INFO +26 -0
paper2torch-0.1.0/README.md +0 -0
paper2torch-0.1.0/paper2torch/__init__.py +3 -0
paper2torch-0.1.0/paper2torch/cli.py +104 -0
paper2torch-0.1.0/paper2torch/extractor.py +54 -0
paper2torch-0.1.0/paper2torch/generator.py +94 -0
paper2torch-0.1.0/paper2torch/parser.py +50 -0
paper2torch-0.1.0/paper2torch/validator.py +76 -0
paper2torch-0.1.0/paper2torch.egg-info/PKG-INFO +26 -0
paper2torch-0.1.0/paper2torch.egg-info/SOURCES.txt +14 -0
paper2torch-0.1.0/paper2torch.egg-info/dependency_links.txt +1 -0
paper2torch-0.1.0/paper2torch.egg-info/entry_points.txt +2 -0
paper2torch-0.1.0/paper2torch.egg-info/requires.txt +5 -0
paper2torch-0.1.0/paper2torch.egg-info/top_level.txt +1 -0
paper2torch-0.1.0/setup.cfg +4 -0
paper2torch-0.1.0/setup.py +35 -0

paper2torch-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,26 @@
+Metadata-Version: 2.4
+Name: paper2torch
+Version: 0.1.0
+Summary: Convert research papers to PyTorch code using LLMs
+Home-page: https://github.com/karmagodjs/paper2torch
+Author: Dhruv Kumar
+Author-email: rafftarsingh7982@gmail.com
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: groq
+Requires-Dist: pymupdf
+Requires-Dist: click
+Requires-Dist: rich
+Requires-Dist: python-dotenv
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary

paper2torch-0.1.0/README.md ADDED Viewed

File without changes

paper2torch-0.1.0/paper2torch/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+__version__ = "0.1.0"
+__author__ = "Dhruv Kumar"
+__description__ = "Convert research papers to PyTorch code"

paper2torch-0.1.0/paper2torch/cli.py ADDED Viewed

@@ -0,0 +1,104 @@
+import click
+import os
+import json
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+# naya
+from paper2torch.parser import extract_text_from_pdf, extract_relevant_sections
+from paper2torch.extractor import extract_sections, get_core_content
+from paper2torch.generator import generate_pytorch_code
+from paper2torch.validator import validate_code
+console = Console()
+@click.command()
+@click.argument('pdf_path')
+@click.option('--output', '-o', default='./output', help='Output folder path')
+@click.option('--title', '-t', default='Unknown Paper', help='Paper title')
+def main(pdf_path, output, title):
+    """
+    paper2torch — Convert research papers to PyTorch code.
+    Usage: python cli.py paper.pdf --output ./generated
+    """
+    console.print(Panel.fit(
+        "[bold red]paper2torch[/bold red] — Research Paper to PyTorch",
+        border_style="red"
+    ))
+    # Step 1: PDF parse karo
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        transient=True
+    ) as progress:
+        task = progress.add_task("Parsing PDF...", total=None)
+        try:
+            pdf_data = extract_text_from_pdf(pdf_path)
+            console.print(f"[green]✓[/green] PDF parsed — {pdf_data['total_pages']} pages found")
+        except FileNotFoundError:
+            console.print(f"[red]✗ PDF not found: {pdf_path}[/red]")
+            return
+        # Step 2: Sections extract karo
+        progress.update(task, description="Extracting sections...")
+        sections = extract_sections(pdf_data['full_text'])
+        core_content = get_core_content(sections)
+        console.print(f"[green]✓[/green] Sections extracted — {len(core_content)} chars")
+        # Step 3: PyTorch code generate karo
+        progress.update(task, description="Generating PyTorch code with Gemini...")
+        result = generate_pytorch_code(core_content, title)
+        console.print(f"[green]✓[/green] PyTorch code generated")
+        # Step 4: Validate karo
+        progress.update(task, description="Validating code...")
+        validation = validate_code(result['model_code'])
+        if validation['passed']:
+            console.print(f"[green]✓[/green] Validation passed")
+        else:
+            console.print(f"[yellow]⚠[/yellow] Validation: {validation['summary']}")
+    # Step 5: Output save karo
+    os.makedirs(output, exist_ok=True)
+    # model.py save karo
+    model_path = os.path.join(output, 'model.py')
+    with open(model_path, 'w', encoding='utf-8') as f:
+        f.write(result['model_code'])
+    # config.py save karo
+    config_path = os.path.join(output, 'config.py')
+    with open(config_path, 'w', encoding='utf-8') as f:
+        f.write(result['config_code'])
+    # README save karo
+    readme_path = os.path.join(output, 'README_generated.md')
+    with open(readme_path, 'w', encoding='utf-8') as f:
+        f.write(f"# {title}\n\n")
+        f.write("## Generated by paper2torch\n\n")
+        f.write("### Architecture Info\n\n")
+        f.write(f"```json\n{result['architecture_info']}\n```\n\n")
+        f.write("### Validation\n\n")
+        f.write(f"- Status: {'✓ Passed' if validation['passed'] else '⚠ Issues found'}\n")
+        f.write(f"- Summary: {validation['summary']}\n")
+    console.print(Panel.fit(
+        f"[bold green]Done![/bold green]\n\n"
+        f"Files saved to [cyan]{output}[/cyan]\n"
+        f"  • model.py\n"
+        f"  • config.py\n"
+        f"  • README_generated.md",
+        border_style="green"
+    ))
+if __name__ == "__main__":
+    main()

paper2torch-0.1.0/paper2torch/extractor.py ADDED Viewed

@@ -0,0 +1,54 @@
+import re
+def extract_sections(full_text: str) -> dict:
+    sections = {
+        "abstract": "",
+        "introduction": "",
+        "methodology": "",
+        "architecture": "",
+        "experiments": "",
+        "conclusion": ""
+    }
+    section_patterns = {
+        "abstract": r"abstract",
+        "introduction": r"introduction",
+        "methodology": r"(method|methodology|approach|proposed method)",
+        "architecture": r"(architecture|model architecture|network architecture)",
+        "experiments": r"(experiment|evaluation|results)",
+        "conclusion": r"(conclusion|conclusions)"
+    }
+    lines = full_text.split('\n')
+    current_section = None
+    for line in lines:
+        line_lower = line.lower().strip()
+        # Check if line is a section header
+        if len(line.strip()) < 60:
+            for section, pattern in section_patterns.items():
+                if re.search(pattern, line_lower):
+                    current_section = section
+                    break
+        if current_section:
+            sections[current_section] += line + "\n"
+    return sections
+def get_core_content(sections: dict) -> str:
+    """
+    LLM ko feed karne ke liye core content nikalta hai
+    """
+    priority = ["abstract", "methodology", "architecture", "introduction"]
+    core = ""
+    for section in priority:
+        if sections.get(section):
+            core += f"\n=== {section.upper()} ===\n"
+            core += sections[section]
+    # Limit to 8000 chars — Groq/Gemini context ke liye
+    return core[:8000]

paper2torch-0.1.0/paper2torch/generator.py ADDED Viewed

@@ -0,0 +1,94 @@
+import os
+from groq import Groq
+from dotenv import load_dotenv
+load_dotenv()
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+def generate_pytorch_code(core_content: str, paper_title: str = "Unknown") -> dict:
+    # Step 1: Architecture extract karo
+    arch_prompt = f"""
+You are an expert ML engineer. Read this research paper content and extract the model architecture.
+Paper content:
+{core_content}
+Return ONLY a JSON with these fields:
+{{
+    "model_name": "name of the model",
+    "architecture_type": "transformer/cnn/rnn/mlp/other",
+    "key_components": ["list", "of", "components"],
+    "hyperparameters": {{"param_name": "value"}},
+    "input_format": "description of input",
+    "output_format": "description of output"
+}}
+Return only JSON, no explanation.
+"""
+    arch_response = client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[{"role": "user", "content": arch_prompt}],
+        max_tokens=1000
+    )
+    arch_text = arch_response.choices[0].message.content.strip()
+    # Step 2: PyTorch code generate karo
+    code_prompt = f"""
+You are an expert PyTorch developer. Based on this research paper content, write a complete PyTorch implementation.
+Paper content:
+{core_content}
+Rules:
+1. Write a complete nn.Module class
+2. Include __init__ and forward methods
+3. Add clear comments explaining each component
+4. Include a config dataclass at the top
+5. Add a simple test at the bottom inside if __name__ == "__main__"
+6. Use only standard PyTorch — no external libraries
+7. Make it runnable as-is
+Return ONLY Python code, no explanation, no markdown backticks.
+"""
+    code_response = client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[{"role": "user", "content": code_prompt}],
+        max_tokens=4000
+    )
+    code_text = code_response.choices[0].message.content.strip()
+    if code_text.startswith("```"):
+        lines = code_text.split('\n')
+        code_text = '\n'.join(lines[1:-1])
+    # Step 3: Config generate karo
+    config_prompt = f"""
+Based on this research paper, extract all hyperparameters and write a Python config dataclass.
+Paper content:
+{core_content[:2000]}
+Return ONLY a Python dataclass with all hyperparameters found. Use @dataclass decorator.
+No explanation, no markdown backticks.
+"""
+    config_response = client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[{"role": "user", "content": config_prompt}],
+        max_tokens=1000
+    )
+    config_text = config_response.choices[0].message.content.strip()
+    if config_text.startswith("```"):
+        lines = config_text.split('\n')
+        config_text = '\n'.join(lines[1:-1])
+    return {
+        "architecture_info": arch_text,
+        "model_code": code_text,
+        "config_code": config_text
+    }

paper2torch-0.1.0/paper2torch/parser.py ADDED Viewed

@@ -0,0 +1,50 @@
+import fitz
+import os
+def extract_text_from_pdf(pdf_path: str) -> dict:
+    if not os.path.exists(pdf_path):
+        raise FileNotFoundError(f"PDF not found: {pdf_path}")
+    doc = fitz.open(pdf_path)
+    full_text = ""
+    pages = []
+    for page_num, page in enumerate(doc):
+        text = page.get_text()
+        pages.append({
+            "page": page_num + 1,
+            "text": text
+        })
+        full_text += text + "\n"
+    doc.close()
+    return {
+        "full_text": full_text,
+        "pages": pages,
+        "total_pages": len(pages)
+    }
+def extract_relevant_sections(full_text: str) -> str:
+    keywords = [
+        "abstract", "introduction", "method", "methodology",
+        "architecture", "model", "approach", "proposed",
+        "network", "layer", "attention", "encoder", "decoder"
+    ]
+    lines = full_text.split('\n')
+    relevant_lines = []
+    capture = False
+    for line in lines:
+        line_lower = line.lower().strip()
+        if any(kw in line_lower for kw in keywords) and len(line.strip()) < 60:
+            capture = True
+        if capture:
+            relevant_lines.append(line)
+    return '\n'.join(relevant_lines)

paper2torch-0.1.0/paper2torch/validator.py ADDED Viewed

@@ -0,0 +1,76 @@
+import ast
+import torch
+import sys
+def validate_syntax(code: str) -> dict:
+    """
+    Python syntax check karta hai
+    """
+    try:
+        ast.parse(code)
+        return {
+            "valid": True,
+            "error": None
+        }
+    except SyntaxError as e:
+        return {
+            "valid": False,
+            "error": f"Syntax error at line {e.lineno}: {e.msg}"
+        }
+def validate_pytorch(code: str) -> dict:
+    """
+    PyTorch import aur basic structure check karta hai
+    """
+    issues = []
+    # torch import check
+    if "import torch" not in code:
+        issues.append("torch not imported")
+    # nn.Module check
+    if "nn.Module" not in code:
+        issues.append("No nn.Module class found")
+    # forward method check
+    if "def forward" not in code:
+        issues.append("No forward method found")
+    # __init__ check
+    if "def __init__" not in code:
+        issues.append("No __init__ method found")
+    return {
+        "valid": len(issues) == 0,
+        "issues": issues
+    }
+def validate_code(code: str) -> dict:
+    """
+    Full validation — syntax + pytorch structure
+    """
+    syntax_result = validate_syntax(code)
+    if not syntax_result["valid"]:
+        return {
+            "passed": False,
+            "syntax": syntax_result,
+            "pytorch": None,
+            "summary": f"Syntax error: {syntax_result['error']}"
+        }
+    pytorch_result = validate_pytorch(code)
+    passed = pytorch_result["valid"]
+    summary = "All checks passed!" if passed else f"Issues: {', '.join(pytorch_result['issues'])}"
+    return {
+        "passed": passed,
+        "syntax": syntax_result,
+        "pytorch": pytorch_result,
+        "summary": summary
+    }

paper2torch-0.1.0/paper2torch.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,26 @@
+Metadata-Version: 2.4
+Name: paper2torch
+Version: 0.1.0
+Summary: Convert research papers to PyTorch code using LLMs
+Home-page: https://github.com/karmagodjs/paper2torch
+Author: Dhruv Kumar
+Author-email: rafftarsingh7982@gmail.com
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: groq
+Requires-Dist: pymupdf
+Requires-Dist: click
+Requires-Dist: rich
+Requires-Dist: python-dotenv
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary

paper2torch-0.1.0/paper2torch.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+README.md
+setup.py
+paper2torch/__init__.py
+paper2torch/cli.py
+paper2torch/extractor.py
+paper2torch/generator.py
+paper2torch/parser.py
+paper2torch/validator.py
+paper2torch.egg-info/PKG-INFO
+paper2torch.egg-info/SOURCES.txt
+paper2torch.egg-info/dependency_links.txt
+paper2torch.egg-info/entry_points.txt
+paper2torch.egg-info/requires.txt
+paper2torch.egg-info/top_level.txt

paper2torch-0.1.0/paper2torch.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

paper2torch-0.1.0/paper2torch.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ paper2torch = paper2torch.cli:main

paper2torch-0.1.0/paper2torch.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,5 @@
+groq
+pymupdf
+click
+rich
+python-dotenv

paper2torch-0.1.0/paper2torch.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ paper2torch

paper2torch-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

paper2torch-0.1.0/setup.py ADDED Viewed

@@ -0,0 +1,35 @@
+from setuptools import setup, find_packages
+with open("README.md", "r", encoding="utf-8") as f:
+    long_description = f.read()
+setup(
+    name="paper2torch",
+    version="0.1.0",
+    author="Dhruv Kumar",
+    author_email="rafftarsingh7982@gmail.com",
+    description="Convert research papers to PyTorch code using LLMs",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/karmagodjs/paper2torch",
+    packages=find_packages(),
+    install_requires=[
+        "groq",
+        "pymupdf",
+        "click",
+        "rich",
+        "python-dotenv",
+    ],
+    entry_points={
+        "console_scripts": [
+            "paper2torch=paper2torch.cli:main",
+        ],
+    },
+    python_requires=">=3.8",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+)