scientific-writer 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scientific-writer might be problematic. Click here for more details.

@@ -0,0 +1,295 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Scientific Writer CLI Tool
4
+ A command-line interface for scientific writing powered by Claude Code.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import time
10
+ import asyncio
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ from claude_agent_sdk import query, ClaudeAgentOptions
15
+
16
+ from .core import (
17
+ get_api_key,
18
+ load_system_instructions,
19
+ ensure_output_folder,
20
+ get_data_files,
21
+ process_data_files,
22
+ create_data_context_message,
23
+ )
24
+ from .utils import find_existing_papers, detect_paper_reference
25
+
26
+
27
+ async def main():
28
+ """Main CLI loop for the scientific writer."""
29
+ # Get API key (verify it exists)
30
+ try:
31
+ get_api_key()
32
+ except ValueError as e:
33
+ print(f"Error: {e}")
34
+ sys.exit(1)
35
+
36
+ # Get the current working directory (project root)
37
+ cwd = Path(__file__).parent.parent.absolute()
38
+
39
+ # Ensure paper_outputs folder exists
40
+ output_folder = ensure_output_folder(cwd)
41
+
42
+ # Load system instructions from CLAUDE.md
43
+ system_instructions = load_system_instructions(cwd)
44
+
45
+ # Add conversation continuity instruction
46
+ # Note: The Python CLI handles session tracking via current_paper_path
47
+ # These instructions only apply WITHIN a single CLI session, not across different chat sessions
48
+ system_instructions += "\n\n" + """
49
+ IMPORTANT - CONVERSATION CONTINUITY:
50
+ - The user will provide context in their prompt if they want to continue working on an existing paper
51
+ - If the prompt includes [CONTEXT: You are currently working on a paper in: ...], continue editing that paper
52
+ - If no such context is provided, this is a NEW paper request - create a new paper directory
53
+ - Do NOT assume there's an existing paper unless explicitly told in the prompt context
54
+ - Each new chat session should start with a new paper unless context says otherwise
55
+ """
56
+
57
+ # Configure the Claude agent options
58
+ options = ClaudeAgentOptions(
59
+ system_prompt=system_instructions,
60
+ model="claude-sonnet-4-20250514", # Always use Claude Sonnet 4.5
61
+ allowed_tools=["Read", "Write", "Edit", "Bash", "research-lookup"], # Default Claude Code tools + research lookup
62
+ permission_mode="bypassPermissions", # Execute immediately without approval prompts
63
+ setting_sources=["project"], # Load skills from .claude/skills/
64
+ cwd=str(cwd), # Set working directory to project root
65
+ )
66
+
67
+ # Track conversation state
68
+ current_paper_path = None
69
+ conversation_history = []
70
+
71
+ # Print welcome message
72
+ print("=" * 70)
73
+ print("Scientific Writer CLI - Powered by Claude Sonnet 4.5")
74
+ print("=" * 70)
75
+ print("\nWelcome! I'm your scientific writing assistant.")
76
+ print("\nI can help you with:")
77
+ print(" • Writing scientific papers (IMRaD structure)")
78
+ print(" • Literature reviews and citation management")
79
+ print(" • Peer review feedback")
80
+ print(" • Real-time research lookup using Perplexity Sonar Pro")
81
+ print(" • Document manipulation (docx, pdf, pptx, xlsx)")
82
+ print("\n📋 Workflow:")
83
+ print(" 1. I'll present a brief plan and immediately start execution")
84
+ print(" 2. I'll provide continuous updates during the process")
85
+ print(" 3. All outputs saved to: paper_outputs/<timestamp_description>/")
86
+ print(" 4. Progress tracked in real-time in progress.md")
87
+ print(f"\n📁 Output folder: {output_folder}")
88
+ print(f"\n📦 Data Files:")
89
+ print(" • Place files in the 'data/' folder to include them in your paper")
90
+ print(" • Data files → copied to paper's data/ folder")
91
+ print(" • Images → copied to paper's figures/ folder")
92
+ print(" • Original files are automatically deleted after copying")
93
+ print("\n🤖 Intelligent Paper Detection:")
94
+ print(" • I automatically detect when you're referring to a previous paper")
95
+ print(" • Continue: 'continue', 'update', 'edit', 'the paper', etc.")
96
+ print(" • Search: 'look for', 'find', 'show me', 'where is', etc.")
97
+ print(" • Or reference the paper topic (e.g., 'find the acoustics paper')")
98
+ print(" • Say 'new paper' to explicitly start a fresh paper")
99
+ print("\nType 'exit' or 'quit' to end the session.")
100
+ print("Type 'help' for usage tips.")
101
+ print("=" * 70)
102
+ print()
103
+
104
+ # Main loop
105
+ while True:
106
+ try:
107
+ # Get user input
108
+ user_input = input("\n> ").strip()
109
+
110
+ # Handle special commands
111
+ if user_input.lower() in ["exit", "quit"]:
112
+ print("\nThank you for using Scientific Writer CLI. Goodbye!")
113
+ break
114
+
115
+ if user_input.lower() == "help":
116
+ _print_help()
117
+ continue
118
+
119
+ if not user_input:
120
+ continue
121
+
122
+ # Get all existing papers
123
+ existing_papers = find_existing_papers(output_folder)
124
+
125
+ # Check if user wants to start a new paper
126
+ new_paper_keywords = ["new paper", "start fresh", "start afresh", "create new", "different paper", "another paper"]
127
+ is_new_paper_request = any(keyword in user_input.lower() for keyword in new_paper_keywords)
128
+
129
+ # Try to detect reference to existing paper
130
+ detected_paper_path = None
131
+ if not is_new_paper_request:
132
+ detected_paper_path = detect_paper_reference(user_input, existing_papers)
133
+
134
+ # If we detected a paper reference and it's different from current, update it
135
+ if detected_paper_path and str(detected_paper_path) != current_paper_path:
136
+ current_paper_path = str(detected_paper_path)
137
+ print(f"\n🔍 Detected reference to existing paper: {detected_paper_path.name}")
138
+ print(f"📂 Working on: {current_paper_path}\n")
139
+ elif detected_paper_path and str(detected_paper_path) == current_paper_path:
140
+ # Already working on the right paper, just confirm
141
+ print(f"📂 Continuing with: {Path(current_paper_path).name}\n")
142
+
143
+ # Check for data files and process them if we have a current paper
144
+ data_context = ""
145
+ data_files = get_data_files(cwd)
146
+
147
+ if data_files and current_paper_path and not is_new_paper_request:
148
+ print(f"📦 Found {len(data_files)} file(s) in data folder. Processing...")
149
+ processed_info = process_data_files(cwd, data_files, current_paper_path)
150
+ if processed_info:
151
+ data_context = create_data_context_message(processed_info)
152
+ data_count = len(processed_info['data_files'])
153
+ image_count = len(processed_info['image_files'])
154
+ if data_count > 0:
155
+ print(f" ✓ Copied {data_count} data file(s) to data/")
156
+ if image_count > 0:
157
+ print(f" ✓ Copied {image_count} image(s) to figures/")
158
+ print(" ✓ Deleted original files from data folder\n")
159
+ elif data_files and not current_paper_path:
160
+ # Store data files info for later processing once paper is created
161
+ print(f"\n📦 Found {len(data_files)} file(s) in data folder.")
162
+ print(" They will be processed once the paper directory is created.\n")
163
+
164
+ # Build contextual prompt
165
+ contextual_prompt = user_input
166
+
167
+ # Add context about current paper if one exists and not starting new
168
+ if current_paper_path and not is_new_paper_request:
169
+ contextual_prompt = f"""[CONTEXT: You are currently working on a paper in: {current_paper_path}]
170
+ [INSTRUCTION: Continue editing this existing paper. Do NOT create a new paper directory.]
171
+ {data_context}
172
+ User request: {user_input}"""
173
+ elif is_new_paper_request:
174
+ # Reset paper tracking when explicitly starting new
175
+ current_paper_path = None
176
+ print("📝 Starting a new paper...\n")
177
+
178
+ # Send query to Claude
179
+ print() # Add blank line before response
180
+ async for message in query(prompt=contextual_prompt, options=options):
181
+ # Handle AssistantMessage with content blocks
182
+ if hasattr(message, "content") and message.content:
183
+ for block in message.content:
184
+ if hasattr(block, "text"):
185
+ print(block.text, end="", flush=True)
186
+
187
+ print() # Add blank line after response
188
+
189
+ # Try to detect if a new paper directory was created
190
+ if not current_paper_path or is_new_paper_request:
191
+ # Look for the most recently modified directory in paper_outputs
192
+ # Only update if it was modified in the last 10 seconds (indicating it was just created)
193
+ try:
194
+ paper_dirs = [d for d in output_folder.iterdir() if d.is_dir()]
195
+ if paper_dirs:
196
+ most_recent = max(paper_dirs, key=lambda d: d.stat().st_mtime)
197
+ time_since_modification = time.time() - most_recent.stat().st_mtime
198
+
199
+ # Only set as current paper if it was modified very recently (within last 10 seconds)
200
+ if time_since_modification < 10:
201
+ current_paper_path = str(most_recent)
202
+ print(f"\n📂 Working on: {most_recent.name}")
203
+
204
+ # Process any remaining data files now that we have a paper path
205
+ remaining_data_files = get_data_files(cwd)
206
+ if remaining_data_files:
207
+ print(f"\n📦 Processing {len(remaining_data_files)} data file(s)...")
208
+ processed_info = process_data_files(cwd, remaining_data_files, current_paper_path)
209
+ if processed_info:
210
+ data_count = len(processed_info['data_files'])
211
+ image_count = len(processed_info['image_files'])
212
+ if data_count > 0:
213
+ print(f" ✓ Copied {data_count} data file(s) to data/")
214
+ if image_count > 0:
215
+ print(f" ✓ Copied {image_count} image(s) to figures/")
216
+ print(" ✓ Deleted original files from data folder")
217
+ except Exception:
218
+ pass # Silently fail if we can't detect the directory
219
+
220
+ except KeyboardInterrupt:
221
+ print("\n\nInterrupted. Type 'exit' to quit or continue with a new prompt.")
222
+ continue
223
+ except Exception as e:
224
+ print(f"\nError: {str(e)}")
225
+ print("Please try again or type 'exit' to quit.")
226
+
227
+
228
+ def _print_help():
229
+ """Print help information."""
230
+ print("\n" + "=" * 70)
231
+ print("HELP - Scientific Writer CLI")
232
+ print("=" * 70)
233
+ print("\n📝 What I Can Do:")
234
+ print(" • Create complete scientific papers (LaTeX, Word, Markdown)")
235
+ print(" • Literature reviews with citation management")
236
+ print(" • Peer review feedback on drafts")
237
+ print(" • Real-time research lookup using Perplexity Sonar Pro")
238
+ print(" • Format citations in any style (APA, IEEE, Nature, etc.)")
239
+ print(" • Document manipulation (docx, pdf, pptx, xlsx)")
240
+ print("\n🔄 How I Work:")
241
+ print(" 1. You describe what you need")
242
+ print(" 2. I present a brief plan and start execution immediately")
243
+ print(" 3. I provide continuous progress updates")
244
+ print(" 4. All files organized in paper_outputs/ folder")
245
+ print("\n💡 Example Requests:")
246
+ print(" 'Create a NeurIPS paper on transformer attention mechanisms'")
247
+ print(" 'Write a literature review on CRISPR gene editing'")
248
+ print(" 'Review my methods section in draft.docx'")
249
+ print(" 'Research recent advances in quantum computing 2024'")
250
+ print(" 'Create a Nature paper on climate change impacts'")
251
+ print(" 'Format 20 citations in IEEE style'")
252
+ print("\n📁 File Organization:")
253
+ print(" All work saved to: paper_outputs/<timestamp>_<description>/")
254
+ print(" - drafts/ - Working versions")
255
+ print(" - final/ - Completed documents")
256
+ print(" - references/ - Bibliography files")
257
+ print(" - figures/ - Images and charts")
258
+ print(" - data/ - Data files for the paper")
259
+ print(" - progress.md - Real-time progress log")
260
+ print(" - SUMMARY.md - Project summary and instructions")
261
+ print("\n📦 Data Files:")
262
+ print(" Place files in the 'data/' folder at project root:")
263
+ print(" • Data files (csv, txt, json, etc.) → copied to paper's data/")
264
+ print(" • Images (png, jpg, svg, etc.) → copied to paper's figures/")
265
+ print(" • Files are used as context for the paper")
266
+ print(" • Original files automatically deleted after copying")
267
+ print("\n🎯 Pro Tips:")
268
+ print(" • Be specific about journal/conference (e.g., 'Nature', 'NeurIPS')")
269
+ print(" • Mention citation style if you have a preference")
270
+ print(" • I'll make smart defaults if you don't specify details")
271
+ print(" • Check progress.md for detailed execution logs")
272
+ print("\n🔄 Intelligent Paper Detection:")
273
+ print(" • I automatically detect when you're referring to a previous paper")
274
+ print(" • Continue working: 'continue the paper', 'update my paper', 'edit the poster'")
275
+ print(" • Search/find: 'look for the X paper', 'find the paper about Y'")
276
+ print(" • Or mention the paper topic: 'show me the acoustics paper'")
277
+ print(" • Keywords like 'continue', 'update', 'edit', 'look for', 'find' trigger detection")
278
+ print(" • I'll find the most relevant paper based on topic matching")
279
+ print(" • Say 'new paper' or 'start fresh' to explicitly begin a new one")
280
+ print(" • Current working paper is tracked throughout the session")
281
+ print("=" * 70)
282
+
283
+
284
+ def cli_main():
285
+ """Entry point for the CLI script."""
286
+ try:
287
+ asyncio.run(main())
288
+ except KeyboardInterrupt:
289
+ print("\n\nExiting...")
290
+ sys.exit(0)
291
+
292
+
293
+ if __name__ == "__main__":
294
+ cli_main()
295
+
@@ -0,0 +1,219 @@
1
+ """Core utilities for scientific writer."""
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Optional, List, Dict, Any
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables from .env file if it exists
10
+ load_dotenv()
11
+
12
+
13
+ def get_api_key(api_key: Optional[str] = None) -> str:
14
+ """
15
+ Get the Anthropic API key.
16
+
17
+ Args:
18
+ api_key: Optional API key to use. If not provided, reads from environment.
19
+
20
+ Returns:
21
+ The API key.
22
+
23
+ Raises:
24
+ ValueError: If API key is not found.
25
+ """
26
+ if api_key:
27
+ return api_key
28
+
29
+ env_key = os.getenv("ANTHROPIC_API_KEY")
30
+ if not env_key:
31
+ raise ValueError(
32
+ "ANTHROPIC_API_KEY not found. Either pass api_key parameter or set "
33
+ "ANTHROPIC_API_KEY environment variable."
34
+ )
35
+ return env_key
36
+
37
+
38
+ def load_system_instructions(cwd: Path) -> str:
39
+ """
40
+ Load system instructions from CLAUDE.md file.
41
+
42
+ Args:
43
+ cwd: Current working directory (project root).
44
+
45
+ Returns:
46
+ System instructions string.
47
+ """
48
+ instructions_file = cwd / "CLAUDE.md"
49
+
50
+ if instructions_file.exists():
51
+ with open(instructions_file, 'r', encoding='utf-8') as f:
52
+ return f.read()
53
+ else:
54
+ # Fallback if CLAUDE.md doesn't exist
55
+ return (
56
+ "You are a scientific writing assistant. Follow best practices for "
57
+ "scientific communication and always present a plan before execution."
58
+ )
59
+
60
+
61
+ def ensure_output_folder(cwd: Path, custom_dir: Optional[str] = None) -> Path:
62
+ """
63
+ Ensure the paper_outputs folder exists.
64
+
65
+ Args:
66
+ cwd: Current working directory (project root).
67
+ custom_dir: Optional custom output directory path.
68
+
69
+ Returns:
70
+ Path to the output folder.
71
+ """
72
+ if custom_dir:
73
+ output_folder = Path(custom_dir).resolve()
74
+ else:
75
+ output_folder = cwd / "paper_outputs"
76
+
77
+ output_folder.mkdir(exist_ok=True, parents=True)
78
+ return output_folder
79
+
80
+
81
+ def get_image_extensions() -> set:
82
+ """Return a set of common image file extensions."""
83
+ return {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif', '.svg', '.webp', '.ico'}
84
+
85
+
86
+ def get_data_files(cwd: Path, data_files: Optional[List[str]] = None) -> List[Path]:
87
+ """
88
+ Get data files either from provided list or from data folder.
89
+
90
+ Args:
91
+ cwd: Current working directory (project root).
92
+ data_files: Optional list of file paths. If not provided, reads from data/ folder.
93
+
94
+ Returns:
95
+ List of Path objects for data files.
96
+ """
97
+ if data_files:
98
+ return [Path(f).resolve() for f in data_files]
99
+
100
+ data_folder = cwd / "data"
101
+ if not data_folder.exists():
102
+ return []
103
+
104
+ files = []
105
+ for file_path in data_folder.iterdir():
106
+ if file_path.is_file():
107
+ files.append(file_path)
108
+
109
+ return files
110
+
111
+
112
+ def process_data_files(
113
+ cwd: Path,
114
+ data_files: List[Path],
115
+ paper_output_path: str,
116
+ delete_originals: bool = True
117
+ ) -> Optional[Dict[str, Any]]:
118
+ """
119
+ Process data files by copying them to the paper output folder.
120
+ Images go to figures/, other files go to data/.
121
+
122
+ Args:
123
+ cwd: Current working directory (project root).
124
+ data_files: List of file paths to process.
125
+ paper_output_path: Path to the paper output directory.
126
+ delete_originals: Whether to delete original files after copying.
127
+
128
+ Returns:
129
+ Dictionary with information about processed files, or None if no files.
130
+ """
131
+ if not data_files:
132
+ return None
133
+
134
+ paper_output = Path(paper_output_path)
135
+ data_output = paper_output / "data"
136
+ figures_output = paper_output / "figures"
137
+
138
+ # Ensure output directories exist
139
+ data_output.mkdir(parents=True, exist_ok=True)
140
+ figures_output.mkdir(parents=True, exist_ok=True)
141
+
142
+ image_extensions = get_image_extensions()
143
+ processed_info = {
144
+ 'data_files': [],
145
+ 'image_files': [],
146
+ 'all_files': []
147
+ }
148
+
149
+ for file_path in data_files:
150
+ file_ext = file_path.suffix.lower()
151
+ file_name = file_path.name
152
+
153
+ # Determine destination based on file type
154
+ if file_ext in image_extensions:
155
+ destination = figures_output / file_name
156
+ file_type = 'image'
157
+ processed_info['image_files'].append({
158
+ 'name': file_name,
159
+ 'path': str(destination),
160
+ 'original': str(file_path)
161
+ })
162
+ else:
163
+ destination = data_output / file_name
164
+ file_type = 'data'
165
+ processed_info['data_files'].append({
166
+ 'name': file_name,
167
+ 'path': str(destination),
168
+ 'original': str(file_path)
169
+ })
170
+
171
+ # Copy the file
172
+ try:
173
+ shutil.copy2(file_path, destination)
174
+ processed_info['all_files'].append({
175
+ 'name': file_name,
176
+ 'type': file_type,
177
+ 'destination': str(destination)
178
+ })
179
+
180
+ # Delete the original file after successful copy if requested
181
+ if delete_originals:
182
+ file_path.unlink()
183
+
184
+ except Exception as e:
185
+ print(f"Warning: Could not process {file_name}: {str(e)}")
186
+
187
+ return processed_info
188
+
189
+
190
+ def create_data_context_message(processed_info: Optional[Dict[str, Any]]) -> str:
191
+ """
192
+ Create a context message about available data files.
193
+
194
+ Args:
195
+ processed_info: Dictionary with processed file information.
196
+
197
+ Returns:
198
+ Context message string.
199
+ """
200
+ if not processed_info or not processed_info['all_files']:
201
+ return ""
202
+
203
+ context_parts = ["\n[DATA FILES AVAILABLE]"]
204
+
205
+ if processed_info['data_files']:
206
+ context_parts.append("\nData files (in data/ folder):")
207
+ for file_info in processed_info['data_files']:
208
+ context_parts.append(f" - {file_info['name']}: {file_info['path']}")
209
+
210
+ if processed_info['image_files']:
211
+ context_parts.append("\nImage files (in figures/ folder):")
212
+ for file_info in processed_info['image_files']:
213
+ context_parts.append(f" - {file_info['name']}: {file_info['path']}")
214
+ context_parts.append("\nNote: These images can be referenced as figures in the paper.")
215
+
216
+ context_parts.append("[END DATA FILES]\n")
217
+
218
+ return "\n".join(context_parts)
219
+
@@ -0,0 +1,76 @@
1
+ """Data models for scientific writer API responses."""
2
+
3
+ from dataclasses import dataclass, field, asdict
4
+ from typing import Optional, List, Dict, Any
5
+ from datetime import datetime
6
+
7
+
8
+ @dataclass
9
+ class ProgressUpdate:
10
+ """Progress update during paper generation."""
11
+ type: str = "progress"
12
+ timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
13
+ message: str = ""
14
+ stage: str = "initialization" # initialization|research|writing|compilation|complete
15
+ percentage: int = 0
16
+
17
+ def to_dict(self) -> Dict[str, Any]:
18
+ """Convert to dictionary for JSON serialization."""
19
+ return asdict(self)
20
+
21
+
22
+ @dataclass
23
+ class PaperMetadata:
24
+ """Metadata about the generated paper."""
25
+ title: Optional[str] = None
26
+ created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
27
+ topic: str = ""
28
+ word_count: Optional[int] = None
29
+
30
+ def to_dict(self) -> Dict[str, Any]:
31
+ """Convert to dictionary for JSON serialization."""
32
+ return asdict(self)
33
+
34
+
35
+ @dataclass
36
+ class PaperFiles:
37
+ """File paths for all generated paper artifacts."""
38
+ pdf_final: Optional[str] = None
39
+ tex_final: Optional[str] = None
40
+ pdf_drafts: List[str] = field(default_factory=list)
41
+ tex_drafts: List[str] = field(default_factory=list)
42
+ bibliography: Optional[str] = None
43
+ figures: List[str] = field(default_factory=list)
44
+ data: List[str] = field(default_factory=list)
45
+ progress_log: Optional[str] = None
46
+ summary: Optional[str] = None
47
+
48
+ def to_dict(self) -> Dict[str, Any]:
49
+ """Convert to dictionary for JSON serialization."""
50
+ return asdict(self)
51
+
52
+
53
+ @dataclass
54
+ class PaperResult:
55
+ """Final result containing all information about the generated paper."""
56
+ type: str = "result"
57
+ status: str = "success" # success|partial|failed
58
+ paper_directory: str = ""
59
+ paper_name: str = ""
60
+ metadata: PaperMetadata = field(default_factory=PaperMetadata)
61
+ files: PaperFiles = field(default_factory=PaperFiles)
62
+ citations: Dict[str, Any] = field(default_factory=dict)
63
+ figures_count: int = 0
64
+ compilation_success: bool = False
65
+ errors: List[str] = field(default_factory=list)
66
+
67
+ def to_dict(self) -> Dict[str, Any]:
68
+ """Convert to dictionary for JSON serialization."""
69
+ result = asdict(self)
70
+ # Ensure nested objects are also dictionaries
71
+ if isinstance(self.metadata, PaperMetadata):
72
+ result['metadata'] = self.metadata.to_dict()
73
+ if isinstance(self.files, PaperFiles):
74
+ result['files'] = self.files.to_dict()
75
+ return result
76
+