scientific-writer 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scientific-writer might be problematic. Click here for more details.
- scientific_writer/__init__.py +43 -0
- scientific_writer/api.py +370 -0
- scientific_writer/cli.py +295 -0
- scientific_writer/core.py +219 -0
- scientific_writer/models.py +76 -0
- scientific_writer/utils.py +289 -0
- scientific_writer-2.0.0.dist-info/METADATA +98 -0
- scientific_writer-2.0.0.dist-info/RECORD +11 -0
- scientific_writer-2.0.0.dist-info/WHEEL +4 -0
- scientific_writer-2.0.0.dist-info/entry_points.txt +2 -0
- scientific_writer-2.0.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scientific Writer - AI-powered scientific writing assistant.
|
|
3
|
+
|
|
4
|
+
A powerful Python package for generating scientific papers, literature reviews,
|
|
5
|
+
and academic documents using Claude Sonnet 4.5.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
Generate a paper programmatically::
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from scientific_writer import generate_paper
|
|
12
|
+
|
|
13
|
+
async def main():
|
|
14
|
+
async for update in generate_paper("Create a Nature paper on CRISPR"):
|
|
15
|
+
if update["type"] == "progress":
|
|
16
|
+
print(f"[{update['percentage']}%] {update['message']}")
|
|
17
|
+
else:
|
|
18
|
+
print(f"Paper created: {update['paper_directory']}")
|
|
19
|
+
print(f"PDF: {update['files']['pdf_final']}")
|
|
20
|
+
|
|
21
|
+
asyncio.run(main())
|
|
22
|
+
|
|
23
|
+
Use the CLI::
|
|
24
|
+
|
|
25
|
+
$ scientific-writer
|
|
26
|
+
> Create a NeurIPS paper on transformer attention mechanisms
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from .api import generate_paper
|
|
30
|
+
from .models import ProgressUpdate, PaperResult, PaperMetadata, PaperFiles
|
|
31
|
+
|
|
32
|
+
__version__ = "2.0.0"
|
|
33
|
+
__author__ = "Scientific Writer Contributors"
|
|
34
|
+
__license__ = "MIT"
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"generate_paper",
|
|
38
|
+
"ProgressUpdate",
|
|
39
|
+
"PaperResult",
|
|
40
|
+
"PaperMetadata",
|
|
41
|
+
"PaperFiles",
|
|
42
|
+
]
|
|
43
|
+
|
scientific_writer/api.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""Async API for programmatic scientific paper generation."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, List, Dict, Any, AsyncGenerator, Union
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from claude_agent_sdk import query, ClaudeAgentOptions
|
|
10
|
+
|
|
11
|
+
from .core import (
|
|
12
|
+
get_api_key,
|
|
13
|
+
load_system_instructions,
|
|
14
|
+
ensure_output_folder,
|
|
15
|
+
get_data_files,
|
|
16
|
+
process_data_files,
|
|
17
|
+
create_data_context_message,
|
|
18
|
+
)
|
|
19
|
+
from .models import ProgressUpdate, PaperResult, PaperMetadata, PaperFiles
|
|
20
|
+
from .utils import (
|
|
21
|
+
scan_paper_directory,
|
|
22
|
+
count_citations_in_bib,
|
|
23
|
+
extract_citation_style,
|
|
24
|
+
count_words_in_tex,
|
|
25
|
+
extract_title_from_tex,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def generate_paper(
|
|
30
|
+
query: str,
|
|
31
|
+
output_dir: Optional[str] = None,
|
|
32
|
+
api_key: Optional[str] = None,
|
|
33
|
+
model: str = "claude-sonnet-4-20250514",
|
|
34
|
+
data_files: Optional[List[str]] = None,
|
|
35
|
+
cwd: Optional[str] = None,
|
|
36
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
37
|
+
"""
|
|
38
|
+
Generate a scientific paper asynchronously with progress updates.
|
|
39
|
+
|
|
40
|
+
This is a stateless async generator that yields progress updates during
|
|
41
|
+
execution and a final comprehensive result with all paper details.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
query: The paper generation request (e.g., "Create a Nature paper on CRISPR")
|
|
45
|
+
output_dir: Optional custom output directory (defaults to cwd/paper_outputs)
|
|
46
|
+
api_key: Optional Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
|
|
47
|
+
model: Claude model to use (default: claude-sonnet-4-20250514)
|
|
48
|
+
data_files: Optional list of data file paths to include
|
|
49
|
+
cwd: Optional working directory (defaults to package parent directory)
|
|
50
|
+
|
|
51
|
+
Yields:
|
|
52
|
+
Progress updates (dict with type="progress") during execution
|
|
53
|
+
Final result (dict with type="result") containing all paper information
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
```python
|
|
57
|
+
async for update in generate_paper("Create a NeurIPS paper on transformers"):
|
|
58
|
+
if update["type"] == "progress":
|
|
59
|
+
print(f"[{update['percentage']}%] {update['message']}")
|
|
60
|
+
else:
|
|
61
|
+
print(f"Paper created: {update['paper_directory']}")
|
|
62
|
+
print(f"PDF: {update['files']['pdf_final']}")
|
|
63
|
+
```
|
|
64
|
+
"""
|
|
65
|
+
# Initialize
|
|
66
|
+
start_time = time.time()
|
|
67
|
+
|
|
68
|
+
# Get API key
|
|
69
|
+
try:
|
|
70
|
+
api_key_value = get_api_key(api_key)
|
|
71
|
+
except ValueError as e:
|
|
72
|
+
yield _create_error_result(str(e))
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
# Determine working directory
|
|
76
|
+
if cwd:
|
|
77
|
+
work_dir = Path(cwd).resolve()
|
|
78
|
+
else:
|
|
79
|
+
# Default to package parent directory (project root)
|
|
80
|
+
work_dir = Path(__file__).parent.parent.absolute()
|
|
81
|
+
|
|
82
|
+
# Ensure output folder exists
|
|
83
|
+
output_folder = ensure_output_folder(work_dir, output_dir)
|
|
84
|
+
|
|
85
|
+
# Initial progress update
|
|
86
|
+
yield ProgressUpdate(
|
|
87
|
+
message="Initializing paper generation",
|
|
88
|
+
stage="initialization",
|
|
89
|
+
percentage=0,
|
|
90
|
+
).to_dict()
|
|
91
|
+
|
|
92
|
+
# Load system instructions
|
|
93
|
+
system_instructions = load_system_instructions(work_dir)
|
|
94
|
+
|
|
95
|
+
# Add conversation continuity instruction
|
|
96
|
+
system_instructions += "\n\n" + """
|
|
97
|
+
IMPORTANT - CONVERSATION CONTINUITY:
|
|
98
|
+
- This is a NEW paper request - create a new paper directory
|
|
99
|
+
- Create a unique timestamped directory in the paper_outputs folder
|
|
100
|
+
- Do NOT assume there's an existing paper unless explicitly told in the prompt context
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
# Process data files if provided
|
|
104
|
+
data_context = ""
|
|
105
|
+
temp_paper_path = None
|
|
106
|
+
|
|
107
|
+
if data_files:
|
|
108
|
+
data_file_paths = get_data_files(work_dir, data_files)
|
|
109
|
+
if data_file_paths:
|
|
110
|
+
# We'll need to process these after the paper directory is created
|
|
111
|
+
yield ProgressUpdate(
|
|
112
|
+
message=f"Found {len(data_file_paths)} data file(s) to process",
|
|
113
|
+
stage="initialization",
|
|
114
|
+
percentage=5,
|
|
115
|
+
).to_dict()
|
|
116
|
+
|
|
117
|
+
# Configure Claude agent options
|
|
118
|
+
options = ClaudeAgentOptions(
|
|
119
|
+
system_prompt=system_instructions,
|
|
120
|
+
model=model,
|
|
121
|
+
allowed_tools=["Read", "Write", "Edit", "Bash", "research-lookup"],
|
|
122
|
+
permission_mode="bypassPermissions",
|
|
123
|
+
setting_sources=["project"],
|
|
124
|
+
cwd=str(work_dir),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Track progress through message analysis
|
|
128
|
+
current_stage = "initialization"
|
|
129
|
+
current_percentage = 10
|
|
130
|
+
paper_directory = None
|
|
131
|
+
|
|
132
|
+
yield ProgressUpdate(
|
|
133
|
+
message="Starting paper generation with Claude",
|
|
134
|
+
stage="initialization",
|
|
135
|
+
percentage=10,
|
|
136
|
+
).to_dict()
|
|
137
|
+
|
|
138
|
+
# Execute query with Claude
|
|
139
|
+
try:
|
|
140
|
+
accumulated_text = ""
|
|
141
|
+
async for message in query(prompt=query, options=options):
|
|
142
|
+
if hasattr(message, "content") and message.content:
|
|
143
|
+
for block in message.content:
|
|
144
|
+
if hasattr(block, "text"):
|
|
145
|
+
text = block.text
|
|
146
|
+
accumulated_text += text
|
|
147
|
+
|
|
148
|
+
# Analyze text for progress indicators
|
|
149
|
+
stage, percentage, msg = _analyze_progress(accumulated_text, current_stage, current_percentage)
|
|
150
|
+
|
|
151
|
+
if stage != current_stage or percentage != current_percentage:
|
|
152
|
+
current_stage = stage
|
|
153
|
+
current_percentage = percentage
|
|
154
|
+
|
|
155
|
+
yield ProgressUpdate(
|
|
156
|
+
message=msg,
|
|
157
|
+
stage=stage,
|
|
158
|
+
percentage=percentage,
|
|
159
|
+
).to_dict()
|
|
160
|
+
|
|
161
|
+
# Paper generation complete - now scan for results
|
|
162
|
+
yield ProgressUpdate(
|
|
163
|
+
message="Scanning paper output directory",
|
|
164
|
+
stage="complete",
|
|
165
|
+
percentage=95,
|
|
166
|
+
).to_dict()
|
|
167
|
+
|
|
168
|
+
# Find the most recently created paper directory
|
|
169
|
+
paper_directory = _find_most_recent_paper(output_folder, start_time)
|
|
170
|
+
|
|
171
|
+
if not paper_directory:
|
|
172
|
+
yield _create_error_result("Paper directory not found after generation")
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
# Process any data files now if we have a paper directory
|
|
176
|
+
if data_files:
|
|
177
|
+
data_file_paths = get_data_files(work_dir, data_files)
|
|
178
|
+
if data_file_paths:
|
|
179
|
+
processed_info = process_data_files(
|
|
180
|
+
work_dir,
|
|
181
|
+
data_file_paths,
|
|
182
|
+
str(paper_directory),
|
|
183
|
+
delete_originals=False # Don't delete when using programmatic API
|
|
184
|
+
)
|
|
185
|
+
if processed_info:
|
|
186
|
+
yield ProgressUpdate(
|
|
187
|
+
message=f"Processed {len(processed_info['all_files'])} data file(s)",
|
|
188
|
+
stage="complete",
|
|
189
|
+
percentage=97,
|
|
190
|
+
).to_dict()
|
|
191
|
+
|
|
192
|
+
# Scan the paper directory for all files
|
|
193
|
+
file_info = scan_paper_directory(paper_directory)
|
|
194
|
+
|
|
195
|
+
# Build comprehensive result
|
|
196
|
+
result = _build_paper_result(paper_directory, file_info)
|
|
197
|
+
|
|
198
|
+
yield ProgressUpdate(
|
|
199
|
+
message="Paper generation complete",
|
|
200
|
+
stage="complete",
|
|
201
|
+
percentage=100,
|
|
202
|
+
).to_dict()
|
|
203
|
+
|
|
204
|
+
# Final result
|
|
205
|
+
yield result.to_dict()
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
yield _create_error_result(f"Error during paper generation: {str(e)}")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _analyze_progress(text: str, current_stage: str, current_percentage: int) -> tuple:
|
|
212
|
+
"""
|
|
213
|
+
Analyze accumulated text to determine current progress stage.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Tuple of (stage, percentage, message)
|
|
217
|
+
"""
|
|
218
|
+
text_lower = text.lower()
|
|
219
|
+
|
|
220
|
+
# Check for various progress indicators
|
|
221
|
+
if "research" in text_lower or "literature" in text_lower or "searching" in text_lower:
|
|
222
|
+
if current_stage != "research":
|
|
223
|
+
return "research", 30, "Conducting literature research"
|
|
224
|
+
|
|
225
|
+
if "writing" in text_lower or "introduction" in text_lower or "methods" in text_lower:
|
|
226
|
+
if current_stage != "writing":
|
|
227
|
+
return "writing", 50, "Writing paper sections"
|
|
228
|
+
elif current_percentage < 70:
|
|
229
|
+
return "writing", min(current_percentage + 10, 70), "Writing paper sections"
|
|
230
|
+
|
|
231
|
+
if "compil" in text_lower or "latex" in text_lower or "pdf" in text_lower:
|
|
232
|
+
if current_stage != "compilation":
|
|
233
|
+
return "compilation", 80, "Compiling LaTeX to PDF"
|
|
234
|
+
|
|
235
|
+
if "complete" in text_lower or "finished" in text_lower or "done" in text_lower:
|
|
236
|
+
return "complete", 90, "Finalizing paper"
|
|
237
|
+
|
|
238
|
+
# No change detected
|
|
239
|
+
return current_stage, current_percentage, "Processing..."
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _find_most_recent_paper(output_folder: Path, start_time: float) -> Optional[Path]:
|
|
243
|
+
"""
|
|
244
|
+
Find the most recently created/modified paper directory.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
output_folder: Path to paper_outputs folder
|
|
248
|
+
start_time: Start time of generation (to filter relevant directories)
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Path to paper directory or None
|
|
252
|
+
"""
|
|
253
|
+
try:
|
|
254
|
+
paper_dirs = [d for d in output_folder.iterdir() if d.is_dir()]
|
|
255
|
+
if not paper_dirs:
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
# Filter to only directories modified after start_time
|
|
259
|
+
recent_dirs = [
|
|
260
|
+
d for d in paper_dirs
|
|
261
|
+
if d.stat().st_mtime >= start_time - 5 # 5 second buffer
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
if not recent_dirs:
|
|
265
|
+
# Fallback to most recent directory overall
|
|
266
|
+
recent_dirs = paper_dirs
|
|
267
|
+
|
|
268
|
+
# Return the most recent
|
|
269
|
+
most_recent = max(recent_dirs, key=lambda d: d.stat().st_mtime)
|
|
270
|
+
return most_recent
|
|
271
|
+
except Exception:
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _build_paper_result(paper_dir: Path, file_info: Dict[str, Any]) -> PaperResult:
|
|
276
|
+
"""
|
|
277
|
+
Build a comprehensive PaperResult from scanned files.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
paper_dir: Path to paper directory
|
|
281
|
+
file_info: Dictionary of file information from scan_paper_directory
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
PaperResult object
|
|
285
|
+
"""
|
|
286
|
+
# Extract metadata
|
|
287
|
+
tex_file = file_info['tex_final'] or (file_info['tex_drafts'][0] if file_info['tex_drafts'] else None)
|
|
288
|
+
|
|
289
|
+
title = extract_title_from_tex(tex_file)
|
|
290
|
+
word_count = count_words_in_tex(tex_file)
|
|
291
|
+
|
|
292
|
+
# Extract topic from directory name
|
|
293
|
+
topic = ""
|
|
294
|
+
parts = paper_dir.name.split('_', 2)
|
|
295
|
+
if len(parts) >= 3:
|
|
296
|
+
topic = parts[2].replace('_', ' ')
|
|
297
|
+
|
|
298
|
+
metadata = PaperMetadata(
|
|
299
|
+
title=title,
|
|
300
|
+
created_at=datetime.fromtimestamp(paper_dir.stat().st_ctime).isoformat() + "Z",
|
|
301
|
+
topic=topic,
|
|
302
|
+
word_count=word_count,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Build files object
|
|
306
|
+
files = PaperFiles(
|
|
307
|
+
pdf_final=file_info['pdf_final'],
|
|
308
|
+
tex_final=file_info['tex_final'],
|
|
309
|
+
pdf_drafts=file_info['pdf_drafts'],
|
|
310
|
+
tex_drafts=file_info['tex_drafts'],
|
|
311
|
+
bibliography=file_info['bibliography'],
|
|
312
|
+
figures=file_info['figures'],
|
|
313
|
+
data=file_info['data'],
|
|
314
|
+
progress_log=file_info['progress_log'],
|
|
315
|
+
summary=file_info['summary'],
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Citations info
|
|
319
|
+
citation_count = count_citations_in_bib(file_info['bibliography'])
|
|
320
|
+
citation_style = extract_citation_style(file_info['bibliography'])
|
|
321
|
+
|
|
322
|
+
citations = {
|
|
323
|
+
'count': citation_count,
|
|
324
|
+
'style': citation_style,
|
|
325
|
+
'file': file_info['bibliography'],
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# Determine status
|
|
329
|
+
status = "success"
|
|
330
|
+
compilation_success = file_info['pdf_final'] is not None
|
|
331
|
+
|
|
332
|
+
if not compilation_success:
|
|
333
|
+
if file_info['tex_final']:
|
|
334
|
+
status = "partial" # TeX created but PDF failed
|
|
335
|
+
else:
|
|
336
|
+
status = "failed"
|
|
337
|
+
|
|
338
|
+
result = PaperResult(
|
|
339
|
+
status=status,
|
|
340
|
+
paper_directory=str(paper_dir),
|
|
341
|
+
paper_name=paper_dir.name,
|
|
342
|
+
metadata=metadata,
|
|
343
|
+
files=files,
|
|
344
|
+
citations=citations,
|
|
345
|
+
figures_count=len(file_info['figures']),
|
|
346
|
+
compilation_success=compilation_success,
|
|
347
|
+
errors=[],
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return result
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _create_error_result(error_message: str) -> Dict[str, Any]:
|
|
354
|
+
"""
|
|
355
|
+
Create an error result dictionary.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
error_message: Error message string
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Dictionary with error information
|
|
362
|
+
"""
|
|
363
|
+
result = PaperResult(
|
|
364
|
+
status="failed",
|
|
365
|
+
paper_directory="",
|
|
366
|
+
paper_name="",
|
|
367
|
+
errors=[error_message],
|
|
368
|
+
)
|
|
369
|
+
return result.to_dict()
|
|
370
|
+
|