PyPI - scientific-writer - Versions diffs - 2.0.0__py3-none-any.whl - Mend

scientific-writer 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scientific-writer might be problematic. Click here for more details.

Files changed (11) hide show

scientific_writer/__init__.py +43 -0
scientific_writer/api.py +370 -0
scientific_writer/cli.py +295 -0
scientific_writer/core.py +219 -0
scientific_writer/models.py +76 -0
scientific_writer/utils.py +289 -0
scientific_writer-2.0.0.dist-info/METADATA +98 -0
scientific_writer-2.0.0.dist-info/RECORD +11 -0
scientific_writer-2.0.0.dist-info/WHEEL +4 -0
scientific_writer-2.0.0.dist-info/entry_points.txt +2 -0
scientific_writer-2.0.0.dist-info/licenses/LICENSE +22 -0

scientific_writer/cli.py ADDED Viewed

@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+"""
+Scientific Writer CLI Tool
+A command-line interface for scientific writing powered by Claude Code.
+"""
+import os
+import sys
+import time
+import asyncio
+from pathlib import Path
+from typing import Optional
+from claude_agent_sdk import query, ClaudeAgentOptions
+from .core import (
+    get_api_key,
+    load_system_instructions,
+    ensure_output_folder,
+    get_data_files,
+    process_data_files,
+    create_data_context_message,
+)
+from .utils import find_existing_papers, detect_paper_reference
+async def main():
+    """Main CLI loop for the scientific writer."""
+    # Get API key (verify it exists)
+    try:
+        get_api_key()
+    except ValueError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    # Get the current working directory (project root)
+    cwd = Path(__file__).parent.parent.absolute()
+    # Ensure paper_outputs folder exists
+    output_folder = ensure_output_folder(cwd)
+    # Load system instructions from CLAUDE.md
+    system_instructions = load_system_instructions(cwd)
+    # Add conversation continuity instruction
+    # Note: The Python CLI handles session tracking via current_paper_path
+    # These instructions only apply WITHIN a single CLI session, not across different chat sessions
+    system_instructions += "\n\n" + """
+IMPORTANT - CONVERSATION CONTINUITY:
+- The user will provide context in their prompt if they want to continue working on an existing paper
+- If the prompt includes [CONTEXT: You are currently working on a paper in: ...], continue editing that paper
+- If no such context is provided, this is a NEW paper request - create a new paper directory
+- Do NOT assume there's an existing paper unless explicitly told in the prompt context
+- Each new chat session should start with a new paper unless context says otherwise
+"""
+    # Configure the Claude agent options
+    options = ClaudeAgentOptions(
+        system_prompt=system_instructions,
+        model="claude-sonnet-4-20250514",  # Always use Claude Sonnet 4.5
+        allowed_tools=["Read", "Write", "Edit", "Bash", "research-lookup"],  # Default Claude Code tools + research lookup
+        permission_mode="bypassPermissions",  # Execute immediately without approval prompts
+        setting_sources=["project"],  # Load skills from .claude/skills/
+        cwd=str(cwd),  # Set working directory to project root
+    )
+    # Track conversation state
+    current_paper_path = None
+    conversation_history = []
+    # Print welcome message
+    print("=" * 70)
+    print("Scientific Writer CLI - Powered by Claude Sonnet 4.5")
+    print("=" * 70)
+    print("\nWelcome! I'm your scientific writing assistant.")
+    print("\nI can help you with:")
+    print("  • Writing scientific papers (IMRaD structure)")
+    print("  • Literature reviews and citation management")
+    print("  • Peer review feedback")
+    print("  • Real-time research lookup using Perplexity Sonar Pro")
+    print("  • Document manipulation (docx, pdf, pptx, xlsx)")
+    print("\n📋 Workflow:")
+    print("  1. I'll present a brief plan and immediately start execution")
+    print("  2. I'll provide continuous updates during the process")
+    print("  3. All outputs saved to: paper_outputs/<timestamp_description>/")
+    print("  4. Progress tracked in real-time in progress.md")
+    print(f"\n📁 Output folder: {output_folder}")
+    print(f"\n📦 Data Files:")
+    print("  • Place files in the 'data/' folder to include them in your paper")
+    print("  • Data files → copied to paper's data/ folder")
+    print("  • Images → copied to paper's figures/ folder")
+    print("  • Original files are automatically deleted after copying")
+    print("\n🤖 Intelligent Paper Detection:")
+    print("  • I automatically detect when you're referring to a previous paper")
+    print("  • Continue: 'continue', 'update', 'edit', 'the paper', etc.")
+    print("  • Search: 'look for', 'find', 'show me', 'where is', etc.")
+    print("  • Or reference the paper topic (e.g., 'find the acoustics paper')")
+    print("  • Say 'new paper' to explicitly start a fresh paper")
+    print("\nType 'exit' or 'quit' to end the session.")
+    print("Type 'help' for usage tips.")
+    print("=" * 70)
+    print()
+    # Main loop
+    while True:
+        try:
+            # Get user input
+            user_input = input("\n> ").strip()
+            # Handle special commands
+            if user_input.lower() in ["exit", "quit"]:
+                print("\nThank you for using Scientific Writer CLI. Goodbye!")
+                break
+            if user_input.lower() == "help":
+                _print_help()
+                continue
+            if not user_input:
+                continue
+            # Get all existing papers
+            existing_papers = find_existing_papers(output_folder)
+            # Check if user wants to start a new paper
+            new_paper_keywords = ["new paper", "start fresh", "start afresh", "create new", "different paper", "another paper"]
+            is_new_paper_request = any(keyword in user_input.lower() for keyword in new_paper_keywords)
+            # Try to detect reference to existing paper
+            detected_paper_path = None
+            if not is_new_paper_request:
+                detected_paper_path = detect_paper_reference(user_input, existing_papers)
+                # If we detected a paper reference and it's different from current, update it
+                if detected_paper_path and str(detected_paper_path) != current_paper_path:
+                    current_paper_path = str(detected_paper_path)
+                    print(f"\n🔍 Detected reference to existing paper: {detected_paper_path.name}")
+                    print(f"📂 Working on: {current_paper_path}\n")
+                elif detected_paper_path and str(detected_paper_path) == current_paper_path:
+                    # Already working on the right paper, just confirm
+                    print(f"📂 Continuing with: {Path(current_paper_path).name}\n")
+            # Check for data files and process them if we have a current paper
+            data_context = ""
+            data_files = get_data_files(cwd)
+            if data_files and current_paper_path and not is_new_paper_request:
+                print(f"📦 Found {len(data_files)} file(s) in data folder. Processing...")
+                processed_info = process_data_files(cwd, data_files, current_paper_path)
+                if processed_info:
+                    data_context = create_data_context_message(processed_info)
+                    data_count = len(processed_info['data_files'])
+                    image_count = len(processed_info['image_files'])
+                    if data_count > 0:
+                        print(f"   ✓ Copied {data_count} data file(s) to data/")
+                    if image_count > 0:
+                        print(f"   ✓ Copied {image_count} image(s) to figures/")
+                    print("   ✓ Deleted original files from data folder\n")
+            elif data_files and not current_paper_path:
+                # Store data files info for later processing once paper is created
+                print(f"\n📦 Found {len(data_files)} file(s) in data folder.")
+                print("   They will be processed once the paper directory is created.\n")
+            # Build contextual prompt
+            contextual_prompt = user_input
+            # Add context about current paper if one exists and not starting new
+            if current_paper_path and not is_new_paper_request:
+                contextual_prompt = f"""[CONTEXT: You are currently working on a paper in: {current_paper_path}]
+[INSTRUCTION: Continue editing this existing paper. Do NOT create a new paper directory.]
+{data_context}
+User request: {user_input}"""
+            elif is_new_paper_request:
+                # Reset paper tracking when explicitly starting new
+                current_paper_path = None
+                print("📝 Starting a new paper...\n")
+            # Send query to Claude
+            print()  # Add blank line before response
+            async for message in query(prompt=contextual_prompt, options=options):
+                # Handle AssistantMessage with content blocks
+                if hasattr(message, "content") and message.content:
+                    for block in message.content:
+                        if hasattr(block, "text"):
+                            print(block.text, end="", flush=True)
+            print()  # Add blank line after response
+            # Try to detect if a new paper directory was created
+            if not current_paper_path or is_new_paper_request:
+                # Look for the most recently modified directory in paper_outputs
+                # Only update if it was modified in the last 10 seconds (indicating it was just created)
+                try:
+                    paper_dirs = [d for d in output_folder.iterdir() if d.is_dir()]
+                    if paper_dirs:
+                        most_recent = max(paper_dirs, key=lambda d: d.stat().st_mtime)
+                        time_since_modification = time.time() - most_recent.stat().st_mtime
+                        # Only set as current paper if it was modified very recently (within last 10 seconds)
+                        if time_since_modification < 10:
+                            current_paper_path = str(most_recent)
+                            print(f"\n📂 Working on: {most_recent.name}")
+                            # Process any remaining data files now that we have a paper path
+                            remaining_data_files = get_data_files(cwd)
+                            if remaining_data_files:
+                                print(f"\n📦 Processing {len(remaining_data_files)} data file(s)...")
+                                processed_info = process_data_files(cwd, remaining_data_files, current_paper_path)
+                                if processed_info:
+                                    data_count = len(processed_info['data_files'])
+                                    image_count = len(processed_info['image_files'])
+                                    if data_count > 0:
+                                        print(f"   ✓ Copied {data_count} data file(s) to data/")
+                                    if image_count > 0:
+                                        print(f"   ✓ Copied {image_count} image(s) to figures/")
+                                    print("   ✓ Deleted original files from data folder")
+                except Exception:
+                    pass  # Silently fail if we can't detect the directory
+        except KeyboardInterrupt:
+            print("\n\nInterrupted. Type 'exit' to quit or continue with a new prompt.")
+            continue
+        except Exception as e:
+            print(f"\nError: {str(e)}")
+            print("Please try again or type 'exit' to quit.")
+def _print_help():
+    """Print help information."""
+    print("\n" + "=" * 70)
+    print("HELP - Scientific Writer CLI")
+    print("=" * 70)
+    print("\n📝 What I Can Do:")
+    print("  • Create complete scientific papers (LaTeX, Word, Markdown)")
+    print("  • Literature reviews with citation management")
+    print("  • Peer review feedback on drafts")
+    print("  • Real-time research lookup using Perplexity Sonar Pro")
+    print("  • Format citations in any style (APA, IEEE, Nature, etc.)")
+    print("  • Document manipulation (docx, pdf, pptx, xlsx)")
+    print("\n🔄 How I Work:")
+    print("  1. You describe what you need")
+    print("  2. I present a brief plan and start execution immediately")
+    print("  3. I provide continuous progress updates")
+    print("  4. All files organized in paper_outputs/ folder")
+    print("\n💡 Example Requests:")
+    print("  'Create a NeurIPS paper on transformer attention mechanisms'")
+    print("  'Write a literature review on CRISPR gene editing'")
+    print("  'Review my methods section in draft.docx'")
+    print("  'Research recent advances in quantum computing 2024'")
+    print("  'Create a Nature paper on climate change impacts'")
+    print("  'Format 20 citations in IEEE style'")
+    print("\n📁 File Organization:")
+    print("  All work saved to: paper_outputs/<timestamp>_<description>/")
+    print("  - drafts/ - Working versions")
+    print("  - final/ - Completed documents")
+    print("  - references/ - Bibliography files")
+    print("  - figures/ - Images and charts")
+    print("  - data/ - Data files for the paper")
+    print("  - progress.md - Real-time progress log")
+    print("  - SUMMARY.md - Project summary and instructions")
+    print("\n📦 Data Files:")
+    print("  Place files in the 'data/' folder at project root:")
+    print("  • Data files (csv, txt, json, etc.) → copied to paper's data/")
+    print("  • Images (png, jpg, svg, etc.) → copied to paper's figures/")
+    print("  • Files are used as context for the paper")
+    print("  • Original files automatically deleted after copying")
+    print("\n🎯 Pro Tips:")
+    print("  • Be specific about journal/conference (e.g., 'Nature', 'NeurIPS')")
+    print("  • Mention citation style if you have a preference")
+    print("  • I'll make smart defaults if you don't specify details")
+    print("  • Check progress.md for detailed execution logs")
+    print("\n🔄 Intelligent Paper Detection:")
+    print("  • I automatically detect when you're referring to a previous paper")
+    print("  • Continue working: 'continue the paper', 'update my paper', 'edit the poster'")
+    print("  • Search/find: 'look for the X paper', 'find the paper about Y'")
+    print("  • Or mention the paper topic: 'show me the acoustics paper'")
+    print("  • Keywords like 'continue', 'update', 'edit', 'look for', 'find' trigger detection")
+    print("  • I'll find the most relevant paper based on topic matching")
+    print("  • Say 'new paper' or 'start fresh' to explicitly begin a new one")
+    print("  • Current working paper is tracked throughout the session")
+    print("=" * 70)
+def cli_main():
+    """Entry point for the CLI script."""
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n\nExiting...")
+        sys.exit(0)
+if __name__ == "__main__":
+    cli_main()

scientific_writer/core.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""Core utilities for scientific writer."""
+import os
+import shutil
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+from dotenv import load_dotenv
+# Load environment variables from .env file if it exists
+load_dotenv()
+def get_api_key(api_key: Optional[str] = None) -> str:
+    """
+    Get the Anthropic API key.
+    Args:
+        api_key: Optional API key to use. If not provided, reads from environment.
+    Returns:
+        The API key.
+    Raises:
+        ValueError: If API key is not found.
+    """
+    if api_key:
+        return api_key
+    env_key = os.getenv("ANTHROPIC_API_KEY")
+    if not env_key:
+        raise ValueError(
+            "ANTHROPIC_API_KEY not found. Either pass api_key parameter or set "
+            "ANTHROPIC_API_KEY environment variable."
+        )
+    return env_key
+def load_system_instructions(cwd: Path) -> str:
+    """
+    Load system instructions from CLAUDE.md file.
+    Args:
+        cwd: Current working directory (project root).
+    Returns:
+        System instructions string.
+    """
+    instructions_file = cwd / "CLAUDE.md"
+    if instructions_file.exists():
+        with open(instructions_file, 'r', encoding='utf-8') as f:
+            return f.read()
+    else:
+        # Fallback if CLAUDE.md doesn't exist
+        return (
+            "You are a scientific writing assistant. Follow best practices for "
+            "scientific communication and always present a plan before execution."
+        )
+def ensure_output_folder(cwd: Path, custom_dir: Optional[str] = None) -> Path:
+    """
+    Ensure the paper_outputs folder exists.
+    Args:
+        cwd: Current working directory (project root).
+        custom_dir: Optional custom output directory path.
+    Returns:
+        Path to the output folder.
+    """
+    if custom_dir:
+        output_folder = Path(custom_dir).resolve()
+    else:
+        output_folder = cwd / "paper_outputs"
+    output_folder.mkdir(exist_ok=True, parents=True)
+    return output_folder
+def get_image_extensions() -> set:
+    """Return a set of common image file extensions."""
+    return {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif', '.svg', '.webp', '.ico'}
+def get_data_files(cwd: Path, data_files: Optional[List[str]] = None) -> List[Path]:
+    """
+    Get data files either from provided list or from data folder.
+    Args:
+        cwd: Current working directory (project root).
+        data_files: Optional list of file paths. If not provided, reads from data/ folder.
+    Returns:
+        List of Path objects for data files.
+    """
+    if data_files:
+        return [Path(f).resolve() for f in data_files]
+    data_folder = cwd / "data"
+    if not data_folder.exists():
+        return []
+    files = []
+    for file_path in data_folder.iterdir():
+        if file_path.is_file():
+            files.append(file_path)
+    return files
+def process_data_files(
+    cwd: Path,
+    data_files: List[Path],
+    paper_output_path: str,
+    delete_originals: bool = True
+) -> Optional[Dict[str, Any]]:
+    """
+    Process data files by copying them to the paper output folder.
+    Images go to figures/, other files go to data/.
+    Args:
+        cwd: Current working directory (project root).
+        data_files: List of file paths to process.
+        paper_output_path: Path to the paper output directory.
+        delete_originals: Whether to delete original files after copying.
+    Returns:
+        Dictionary with information about processed files, or None if no files.
+    """
+    if not data_files:
+        return None
+    paper_output = Path(paper_output_path)
+    data_output = paper_output / "data"
+    figures_output = paper_output / "figures"
+    # Ensure output directories exist
+    data_output.mkdir(parents=True, exist_ok=True)
+    figures_output.mkdir(parents=True, exist_ok=True)
+    image_extensions = get_image_extensions()
+    processed_info = {
+        'data_files': [],
+        'image_files': [],
+        'all_files': []
+    }
+    for file_path in data_files:
+        file_ext = file_path.suffix.lower()
+        file_name = file_path.name
+        # Determine destination based on file type
+        if file_ext in image_extensions:
+            destination = figures_output / file_name
+            file_type = 'image'
+            processed_info['image_files'].append({
+                'name': file_name,
+                'path': str(destination),
+                'original': str(file_path)
+            })
+        else:
+            destination = data_output / file_name
+            file_type = 'data'
+            processed_info['data_files'].append({
+                'name': file_name,
+                'path': str(destination),
+                'original': str(file_path)
+            })
+        # Copy the file
+        try:
+            shutil.copy2(file_path, destination)
+            processed_info['all_files'].append({
+                'name': file_name,
+                'type': file_type,
+                'destination': str(destination)
+            })
+            # Delete the original file after successful copy if requested
+            if delete_originals:
+                file_path.unlink()
+        except Exception as e:
+            print(f"Warning: Could not process {file_name}: {str(e)}")
+    return processed_info
+def create_data_context_message(processed_info: Optional[Dict[str, Any]]) -> str:
+    """
+    Create a context message about available data files.
+    Args:
+        processed_info: Dictionary with processed file information.
+    Returns:
+        Context message string.
+    """
+    if not processed_info or not processed_info['all_files']:
+        return ""
+    context_parts = ["\n[DATA FILES AVAILABLE]"]
+    if processed_info['data_files']:
+        context_parts.append("\nData files (in data/ folder):")
+        for file_info in processed_info['data_files']:
+            context_parts.append(f"  - {file_info['name']}: {file_info['path']}")
+    if processed_info['image_files']:
+        context_parts.append("\nImage files (in figures/ folder):")
+        for file_info in processed_info['image_files']:
+            context_parts.append(f"  - {file_info['name']}: {file_info['path']}")
+        context_parts.append("\nNote: These images can be referenced as figures in the paper.")
+    context_parts.append("[END DATA FILES]\n")
+    return "\n".join(context_parts)

scientific_writer/models.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Data models for scientific writer API responses."""
+from dataclasses import dataclass, field, asdict
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+@dataclass
+class ProgressUpdate:
+    """Progress update during paper generation."""
+    type: str = "progress"
+    timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
+    message: str = ""
+    stage: str = "initialization"  # initialization|research|writing|compilation|complete
+    percentage: int = 0
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return asdict(self)
+@dataclass
+class PaperMetadata:
+    """Metadata about the generated paper."""
+    title: Optional[str] = None
+    created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
+    topic: str = ""
+    word_count: Optional[int] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return asdict(self)
+@dataclass
+class PaperFiles:
+    """File paths for all generated paper artifacts."""
+    pdf_final: Optional[str] = None
+    tex_final: Optional[str] = None
+    pdf_drafts: List[str] = field(default_factory=list)
+    tex_drafts: List[str] = field(default_factory=list)
+    bibliography: Optional[str] = None
+    figures: List[str] = field(default_factory=list)
+    data: List[str] = field(default_factory=list)
+    progress_log: Optional[str] = None
+    summary: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return asdict(self)
+@dataclass
+class PaperResult:
+    """Final result containing all information about the generated paper."""
+    type: str = "result"
+    status: str = "success"  # success|partial|failed
+    paper_directory: str = ""
+    paper_name: str = ""
+    metadata: PaperMetadata = field(default_factory=PaperMetadata)
+    files: PaperFiles = field(default_factory=PaperFiles)
+    citations: Dict[str, Any] = field(default_factory=dict)
+    figures_count: int = 0
+    compilation_success: bool = False
+    errors: List[str] = field(default_factory=list)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        result = asdict(self)
+        # Ensure nested objects are also dictionaries
+        if isinstance(self.metadata, PaperMetadata):
+            result['metadata'] = self.metadata.to_dict()
+        if isinstance(self.files, PaperFiles):
+            result['files'] = self.files.to_dict()
+        return result