scientific-writer 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scientific-writer might be problematic. Click here for more details.

@@ -0,0 +1,43 @@
1
+ """
2
+ Scientific Writer - AI-powered scientific writing assistant.
3
+
4
+ A powerful Python package for generating scientific papers, literature reviews,
5
+ and academic documents using Claude Sonnet 4.5.
6
+
7
+ Example:
8
+ Generate a paper programmatically::
9
+
10
+ import asyncio
11
+ from scientific_writer import generate_paper
12
+
13
+ async def main():
14
+ async for update in generate_paper("Create a Nature paper on CRISPR"):
15
+ if update["type"] == "progress":
16
+ print(f"[{update['percentage']}%] {update['message']}")
17
+ else:
18
+ print(f"Paper created: {update['paper_directory']}")
19
+ print(f"PDF: {update['files']['pdf_final']}")
20
+
21
+ asyncio.run(main())
22
+
23
+ Use the CLI::
24
+
25
+ $ scientific-writer
26
+ > Create a NeurIPS paper on transformer attention mechanisms
27
+ """
28
+
29
+ from .api import generate_paper
30
+ from .models import ProgressUpdate, PaperResult, PaperMetadata, PaperFiles
31
+
32
+ __version__ = "2.0.0"
33
+ __author__ = "Scientific Writer Contributors"
34
+ __license__ = "MIT"
35
+
36
+ __all__ = [
37
+ "generate_paper",
38
+ "ProgressUpdate",
39
+ "PaperResult",
40
+ "PaperMetadata",
41
+ "PaperFiles",
42
+ ]
43
+
@@ -0,0 +1,370 @@
1
+ """Async API for programmatic scientific paper generation."""
2
+
3
+ import asyncio
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Optional, List, Dict, Any, AsyncGenerator, Union
7
+ from datetime import datetime
8
+
9
+ from claude_agent_sdk import query, ClaudeAgentOptions
10
+
11
+ from .core import (
12
+ get_api_key,
13
+ load_system_instructions,
14
+ ensure_output_folder,
15
+ get_data_files,
16
+ process_data_files,
17
+ create_data_context_message,
18
+ )
19
+ from .models import ProgressUpdate, PaperResult, PaperMetadata, PaperFiles
20
+ from .utils import (
21
+ scan_paper_directory,
22
+ count_citations_in_bib,
23
+ extract_citation_style,
24
+ count_words_in_tex,
25
+ extract_title_from_tex,
26
+ )
27
+
28
+
29
+ async def generate_paper(
30
+ query: str,
31
+ output_dir: Optional[str] = None,
32
+ api_key: Optional[str] = None,
33
+ model: str = "claude-sonnet-4-20250514",
34
+ data_files: Optional[List[str]] = None,
35
+ cwd: Optional[str] = None,
36
+ ) -> AsyncGenerator[Dict[str, Any], None]:
37
+ """
38
+ Generate a scientific paper asynchronously with progress updates.
39
+
40
+ This is a stateless async generator that yields progress updates during
41
+ execution and a final comprehensive result with all paper details.
42
+
43
+ Args:
44
+ query: The paper generation request (e.g., "Create a Nature paper on CRISPR")
45
+ output_dir: Optional custom output directory (defaults to cwd/paper_outputs)
46
+ api_key: Optional Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
47
+ model: Claude model to use (default: claude-sonnet-4-20250514)
48
+ data_files: Optional list of data file paths to include
49
+ cwd: Optional working directory (defaults to package parent directory)
50
+
51
+ Yields:
52
+ Progress updates (dict with type="progress") during execution
53
+ Final result (dict with type="result") containing all paper information
54
+
55
+ Example:
56
+ ```python
57
+ async for update in generate_paper("Create a NeurIPS paper on transformers"):
58
+ if update["type"] == "progress":
59
+ print(f"[{update['percentage']}%] {update['message']}")
60
+ else:
61
+ print(f"Paper created: {update['paper_directory']}")
62
+ print(f"PDF: {update['files']['pdf_final']}")
63
+ ```
64
+ """
65
+ # Initialize
66
+ start_time = time.time()
67
+
68
+ # Get API key
69
+ try:
70
+ api_key_value = get_api_key(api_key)
71
+ except ValueError as e:
72
+ yield _create_error_result(str(e))
73
+ return
74
+
75
+ # Determine working directory
76
+ if cwd:
77
+ work_dir = Path(cwd).resolve()
78
+ else:
79
+ # Default to package parent directory (project root)
80
+ work_dir = Path(__file__).parent.parent.absolute()
81
+
82
+ # Ensure output folder exists
83
+ output_folder = ensure_output_folder(work_dir, output_dir)
84
+
85
+ # Initial progress update
86
+ yield ProgressUpdate(
87
+ message="Initializing paper generation",
88
+ stage="initialization",
89
+ percentage=0,
90
+ ).to_dict()
91
+
92
+ # Load system instructions
93
+ system_instructions = load_system_instructions(work_dir)
94
+
95
+ # Add conversation continuity instruction
96
+ system_instructions += "\n\n" + """
97
+ IMPORTANT - CONVERSATION CONTINUITY:
98
+ - This is a NEW paper request - create a new paper directory
99
+ - Create a unique timestamped directory in the paper_outputs folder
100
+ - Do NOT assume there's an existing paper unless explicitly told in the prompt context
101
+ """
102
+
103
+ # Process data files if provided
104
+ data_context = ""
105
+ temp_paper_path = None
106
+
107
+ if data_files:
108
+ data_file_paths = get_data_files(work_dir, data_files)
109
+ if data_file_paths:
110
+ # We'll need to process these after the paper directory is created
111
+ yield ProgressUpdate(
112
+ message=f"Found {len(data_file_paths)} data file(s) to process",
113
+ stage="initialization",
114
+ percentage=5,
115
+ ).to_dict()
116
+
117
+ # Configure Claude agent options
118
+ options = ClaudeAgentOptions(
119
+ system_prompt=system_instructions,
120
+ model=model,
121
+ allowed_tools=["Read", "Write", "Edit", "Bash", "research-lookup"],
122
+ permission_mode="bypassPermissions",
123
+ setting_sources=["project"],
124
+ cwd=str(work_dir),
125
+ )
126
+
127
+ # Track progress through message analysis
128
+ current_stage = "initialization"
129
+ current_percentage = 10
130
+ paper_directory = None
131
+
132
+ yield ProgressUpdate(
133
+ message="Starting paper generation with Claude",
134
+ stage="initialization",
135
+ percentage=10,
136
+ ).to_dict()
137
+
138
+ # Execute query with Claude
139
+ try:
140
+ accumulated_text = ""
141
+ async for message in query(prompt=query, options=options):
142
+ if hasattr(message, "content") and message.content:
143
+ for block in message.content:
144
+ if hasattr(block, "text"):
145
+ text = block.text
146
+ accumulated_text += text
147
+
148
+ # Analyze text for progress indicators
149
+ stage, percentage, msg = _analyze_progress(accumulated_text, current_stage, current_percentage)
150
+
151
+ if stage != current_stage or percentage != current_percentage:
152
+ current_stage = stage
153
+ current_percentage = percentage
154
+
155
+ yield ProgressUpdate(
156
+ message=msg,
157
+ stage=stage,
158
+ percentage=percentage,
159
+ ).to_dict()
160
+
161
+ # Paper generation complete - now scan for results
162
+ yield ProgressUpdate(
163
+ message="Scanning paper output directory",
164
+ stage="complete",
165
+ percentage=95,
166
+ ).to_dict()
167
+
168
+ # Find the most recently created paper directory
169
+ paper_directory = _find_most_recent_paper(output_folder, start_time)
170
+
171
+ if not paper_directory:
172
+ yield _create_error_result("Paper directory not found after generation")
173
+ return
174
+
175
+ # Process any data files now if we have a paper directory
176
+ if data_files:
177
+ data_file_paths = get_data_files(work_dir, data_files)
178
+ if data_file_paths:
179
+ processed_info = process_data_files(
180
+ work_dir,
181
+ data_file_paths,
182
+ str(paper_directory),
183
+ delete_originals=False # Don't delete when using programmatic API
184
+ )
185
+ if processed_info:
186
+ yield ProgressUpdate(
187
+ message=f"Processed {len(processed_info['all_files'])} data file(s)",
188
+ stage="complete",
189
+ percentage=97,
190
+ ).to_dict()
191
+
192
+ # Scan the paper directory for all files
193
+ file_info = scan_paper_directory(paper_directory)
194
+
195
+ # Build comprehensive result
196
+ result = _build_paper_result(paper_directory, file_info)
197
+
198
+ yield ProgressUpdate(
199
+ message="Paper generation complete",
200
+ stage="complete",
201
+ percentage=100,
202
+ ).to_dict()
203
+
204
+ # Final result
205
+ yield result.to_dict()
206
+
207
+ except Exception as e:
208
+ yield _create_error_result(f"Error during paper generation: {str(e)}")
209
+
210
+
211
+ def _analyze_progress(text: str, current_stage: str, current_percentage: int) -> tuple:
212
+ """
213
+ Analyze accumulated text to determine current progress stage.
214
+
215
+ Returns:
216
+ Tuple of (stage, percentage, message)
217
+ """
218
+ text_lower = text.lower()
219
+
220
+ # Check for various progress indicators
221
+ if "research" in text_lower or "literature" in text_lower or "searching" in text_lower:
222
+ if current_stage != "research":
223
+ return "research", 30, "Conducting literature research"
224
+
225
+ if "writing" in text_lower or "introduction" in text_lower or "methods" in text_lower:
226
+ if current_stage != "writing":
227
+ return "writing", 50, "Writing paper sections"
228
+ elif current_percentage < 70:
229
+ return "writing", min(current_percentage + 10, 70), "Writing paper sections"
230
+
231
+ if "compil" in text_lower or "latex" in text_lower or "pdf" in text_lower:
232
+ if current_stage != "compilation":
233
+ return "compilation", 80, "Compiling LaTeX to PDF"
234
+
235
+ if "complete" in text_lower or "finished" in text_lower or "done" in text_lower:
236
+ return "complete", 90, "Finalizing paper"
237
+
238
+ # No change detected
239
+ return current_stage, current_percentage, "Processing..."
240
+
241
+
242
+ def _find_most_recent_paper(output_folder: Path, start_time: float) -> Optional[Path]:
243
+ """
244
+ Find the most recently created/modified paper directory.
245
+
246
+ Args:
247
+ output_folder: Path to paper_outputs folder
248
+ start_time: Start time of generation (to filter relevant directories)
249
+
250
+ Returns:
251
+ Path to paper directory or None
252
+ """
253
+ try:
254
+ paper_dirs = [d for d in output_folder.iterdir() if d.is_dir()]
255
+ if not paper_dirs:
256
+ return None
257
+
258
+ # Filter to only directories modified after start_time
259
+ recent_dirs = [
260
+ d for d in paper_dirs
261
+ if d.stat().st_mtime >= start_time - 5 # 5 second buffer
262
+ ]
263
+
264
+ if not recent_dirs:
265
+ # Fallback to most recent directory overall
266
+ recent_dirs = paper_dirs
267
+
268
+ # Return the most recent
269
+ most_recent = max(recent_dirs, key=lambda d: d.stat().st_mtime)
270
+ return most_recent
271
+ except Exception:
272
+ return None
273
+
274
+
275
+ def _build_paper_result(paper_dir: Path, file_info: Dict[str, Any]) -> PaperResult:
276
+ """
277
+ Build a comprehensive PaperResult from scanned files.
278
+
279
+ Args:
280
+ paper_dir: Path to paper directory
281
+ file_info: Dictionary of file information from scan_paper_directory
282
+
283
+ Returns:
284
+ PaperResult object
285
+ """
286
+ # Extract metadata
287
+ tex_file = file_info['tex_final'] or (file_info['tex_drafts'][0] if file_info['tex_drafts'] else None)
288
+
289
+ title = extract_title_from_tex(tex_file)
290
+ word_count = count_words_in_tex(tex_file)
291
+
292
+ # Extract topic from directory name
293
+ topic = ""
294
+ parts = paper_dir.name.split('_', 2)
295
+ if len(parts) >= 3:
296
+ topic = parts[2].replace('_', ' ')
297
+
298
+ metadata = PaperMetadata(
299
+ title=title,
300
+ created_at=datetime.fromtimestamp(paper_dir.stat().st_ctime).isoformat() + "Z",
301
+ topic=topic,
302
+ word_count=word_count,
303
+ )
304
+
305
+ # Build files object
306
+ files = PaperFiles(
307
+ pdf_final=file_info['pdf_final'],
308
+ tex_final=file_info['tex_final'],
309
+ pdf_drafts=file_info['pdf_drafts'],
310
+ tex_drafts=file_info['tex_drafts'],
311
+ bibliography=file_info['bibliography'],
312
+ figures=file_info['figures'],
313
+ data=file_info['data'],
314
+ progress_log=file_info['progress_log'],
315
+ summary=file_info['summary'],
316
+ )
317
+
318
+ # Citations info
319
+ citation_count = count_citations_in_bib(file_info['bibliography'])
320
+ citation_style = extract_citation_style(file_info['bibliography'])
321
+
322
+ citations = {
323
+ 'count': citation_count,
324
+ 'style': citation_style,
325
+ 'file': file_info['bibliography'],
326
+ }
327
+
328
+ # Determine status
329
+ status = "success"
330
+ compilation_success = file_info['pdf_final'] is not None
331
+
332
+ if not compilation_success:
333
+ if file_info['tex_final']:
334
+ status = "partial" # TeX created but PDF failed
335
+ else:
336
+ status = "failed"
337
+
338
+ result = PaperResult(
339
+ status=status,
340
+ paper_directory=str(paper_dir),
341
+ paper_name=paper_dir.name,
342
+ metadata=metadata,
343
+ files=files,
344
+ citations=citations,
345
+ figures_count=len(file_info['figures']),
346
+ compilation_success=compilation_success,
347
+ errors=[],
348
+ )
349
+
350
+ return result
351
+
352
+
353
+ def _create_error_result(error_message: str) -> Dict[str, Any]:
354
+ """
355
+ Create an error result dictionary.
356
+
357
+ Args:
358
+ error_message: Error message string
359
+
360
+ Returns:
361
+ Dictionary with error information
362
+ """
363
+ result = PaperResult(
364
+ status="failed",
365
+ paper_directory="",
366
+ paper_name="",
367
+ errors=[error_message],
368
+ )
369
+ return result.to_dict()
370
+