scientific-writer 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scientific-writer might be problematic. Click here for more details.
- scientific_writer/__init__.py +43 -0
- scientific_writer/api.py +370 -0
- scientific_writer/cli.py +295 -0
- scientific_writer/core.py +219 -0
- scientific_writer/models.py +76 -0
- scientific_writer/utils.py +289 -0
- scientific_writer-2.0.0.dist-info/METADATA +98 -0
- scientific_writer-2.0.0.dist-info/RECORD +11 -0
- scientific_writer-2.0.0.dist-info/WHEEL +4 -0
- scientific_writer-2.0.0.dist-info/entry_points.txt +2 -0
- scientific_writer-2.0.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""Utility functions for scientific writer."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Dict, Any, Optional
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_existing_papers(output_folder: Path) -> List[Dict[str, Any]]:
|
|
9
|
+
"""
|
|
10
|
+
Get all existing paper directories with their metadata.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
output_folder: Path to the paper outputs folder.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
List of dicts with path, name, and timestamp info.
|
|
17
|
+
"""
|
|
18
|
+
papers = []
|
|
19
|
+
if not output_folder.exists():
|
|
20
|
+
return papers
|
|
21
|
+
|
|
22
|
+
for paper_dir in output_folder.iterdir():
|
|
23
|
+
if paper_dir.is_dir():
|
|
24
|
+
papers.append({
|
|
25
|
+
'path': paper_dir,
|
|
26
|
+
'name': paper_dir.name,
|
|
27
|
+
'mtime': paper_dir.stat().st_mtime
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
# Sort by modification time (most recent first)
|
|
31
|
+
papers.sort(key=lambda x: x['mtime'], reverse=True)
|
|
32
|
+
return papers
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_paper_reference(user_input: str, existing_papers: List[Dict[str, Any]]) -> Optional[Path]:
|
|
36
|
+
"""
|
|
37
|
+
Try to detect if the user is referring to an existing paper.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
user_input: User's input text.
|
|
41
|
+
existing_papers: List of existing paper dictionaries.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
The paper path if found, None otherwise.
|
|
45
|
+
"""
|
|
46
|
+
if not existing_papers:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
user_input_lower = user_input.lower()
|
|
50
|
+
|
|
51
|
+
# Keywords that suggest continuing with existing work
|
|
52
|
+
continuation_keywords = [
|
|
53
|
+
"continue", "update", "edit", "revise", "modify", "change",
|
|
54
|
+
"add to", "fix", "improve", "review", "the paper", "this paper",
|
|
55
|
+
"my paper", "current paper", "previous paper", "last paper",
|
|
56
|
+
"poster", "the poster", "my poster", "compile", "generate pdf"
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
# Keywords that suggest searching for/looking up an existing paper
|
|
60
|
+
search_keywords = [
|
|
61
|
+
"look for", "find", "search for", "where is", "which paper",
|
|
62
|
+
"show me", "open", "locate", "get"
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# Keywords that explicitly indicate a new paper
|
|
66
|
+
new_paper_keywords = [
|
|
67
|
+
"new paper", "start fresh", "start afresh", "create new",
|
|
68
|
+
"different paper", "another paper", "write a new"
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
# If user explicitly wants a new paper, return None
|
|
72
|
+
if any(keyword in user_input_lower for keyword in new_paper_keywords):
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
# Check if user mentions continuation or search keywords
|
|
76
|
+
has_continuation_keyword = any(keyword in user_input_lower for keyword in continuation_keywords)
|
|
77
|
+
has_search_keyword = any(keyword in user_input_lower for keyword in search_keywords)
|
|
78
|
+
|
|
79
|
+
# Try to find paper by name/topic keywords
|
|
80
|
+
best_match = None
|
|
81
|
+
best_match_score = 0
|
|
82
|
+
|
|
83
|
+
for paper in existing_papers:
|
|
84
|
+
paper_name = paper['name'].lower()
|
|
85
|
+
# Extract topic from directory name (format: YYYYMMDD_HHMMSS_topic)
|
|
86
|
+
parts = paper_name.split('_', 2)
|
|
87
|
+
if len(parts) >= 3:
|
|
88
|
+
topic = parts[2].replace('_', ' ')
|
|
89
|
+
# Check if topic words appear in user input
|
|
90
|
+
topic_words = topic.split()
|
|
91
|
+
matches = sum(1 for word in topic_words if len(word) > 3 and word in user_input_lower)
|
|
92
|
+
|
|
93
|
+
# Keep track of best match
|
|
94
|
+
if matches > best_match_score:
|
|
95
|
+
best_match_score = matches
|
|
96
|
+
best_match = paper['path']
|
|
97
|
+
|
|
98
|
+
# If we have a strong match (2+ topic words), return it
|
|
99
|
+
# This is especially important for search keywords
|
|
100
|
+
if matches >= 2 and (has_search_keyword or has_continuation_keyword):
|
|
101
|
+
return paper['path']
|
|
102
|
+
|
|
103
|
+
# If we found any match with search keywords, return the best one
|
|
104
|
+
if has_search_keyword and best_match_score > 0:
|
|
105
|
+
return best_match
|
|
106
|
+
|
|
107
|
+
# If user used continuation keywords but no specific match, use most recent paper
|
|
108
|
+
if has_continuation_keyword and existing_papers:
|
|
109
|
+
return existing_papers[0]['path']
|
|
110
|
+
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def scan_paper_directory(paper_dir: Path) -> Dict[str, Any]:
|
|
115
|
+
"""
|
|
116
|
+
Scan a paper directory and collect all file information.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
paper_dir: Path to the paper directory.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Dictionary with comprehensive file information.
|
|
123
|
+
"""
|
|
124
|
+
result = {
|
|
125
|
+
'pdf_final': None,
|
|
126
|
+
'tex_final': None,
|
|
127
|
+
'pdf_drafts': [],
|
|
128
|
+
'tex_drafts': [],
|
|
129
|
+
'bibliography': None,
|
|
130
|
+
'figures': [],
|
|
131
|
+
'data': [],
|
|
132
|
+
'progress_log': None,
|
|
133
|
+
'summary': None,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if not paper_dir.exists():
|
|
137
|
+
return result
|
|
138
|
+
|
|
139
|
+
# Scan final/ directory
|
|
140
|
+
final_dir = paper_dir / "final"
|
|
141
|
+
if final_dir.exists():
|
|
142
|
+
for file in final_dir.iterdir():
|
|
143
|
+
if file.is_file():
|
|
144
|
+
if file.suffix == '.pdf':
|
|
145
|
+
result['pdf_final'] = str(file)
|
|
146
|
+
elif file.suffix == '.tex':
|
|
147
|
+
result['tex_final'] = str(file)
|
|
148
|
+
|
|
149
|
+
# Scan drafts/ directory
|
|
150
|
+
drafts_dir = paper_dir / "drafts"
|
|
151
|
+
if drafts_dir.exists():
|
|
152
|
+
for file in sorted(drafts_dir.iterdir()):
|
|
153
|
+
if file.is_file():
|
|
154
|
+
if file.suffix == '.pdf':
|
|
155
|
+
result['pdf_drafts'].append(str(file))
|
|
156
|
+
elif file.suffix == '.tex':
|
|
157
|
+
result['tex_drafts'].append(str(file))
|
|
158
|
+
|
|
159
|
+
# Scan references/ directory
|
|
160
|
+
references_dir = paper_dir / "references"
|
|
161
|
+
if references_dir.exists():
|
|
162
|
+
bib_file = references_dir / "references.bib"
|
|
163
|
+
if bib_file.exists():
|
|
164
|
+
result['bibliography'] = str(bib_file)
|
|
165
|
+
|
|
166
|
+
# Scan figures/ directory
|
|
167
|
+
figures_dir = paper_dir / "figures"
|
|
168
|
+
if figures_dir.exists():
|
|
169
|
+
for file in sorted(figures_dir.iterdir()):
|
|
170
|
+
if file.is_file():
|
|
171
|
+
result['figures'].append(str(file))
|
|
172
|
+
|
|
173
|
+
# Scan data/ directory
|
|
174
|
+
data_dir = paper_dir / "data"
|
|
175
|
+
if data_dir.exists():
|
|
176
|
+
for file in sorted(data_dir.iterdir()):
|
|
177
|
+
if file.is_file():
|
|
178
|
+
result['data'].append(str(file))
|
|
179
|
+
|
|
180
|
+
# Check for progress.md and SUMMARY.md
|
|
181
|
+
progress_file = paper_dir / "progress.md"
|
|
182
|
+
if progress_file.exists():
|
|
183
|
+
result['progress_log'] = str(progress_file)
|
|
184
|
+
|
|
185
|
+
summary_file = paper_dir / "SUMMARY.md"
|
|
186
|
+
if summary_file.exists():
|
|
187
|
+
result['summary'] = str(summary_file)
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def count_citations_in_bib(bib_file: Optional[str]) -> int:
|
|
193
|
+
"""
|
|
194
|
+
Count the number of citations in a BibTeX file.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
bib_file: Path to the .bib file.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Number of citations found.
|
|
201
|
+
"""
|
|
202
|
+
if not bib_file or not Path(bib_file).exists():
|
|
203
|
+
return 0
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
with open(bib_file, 'r', encoding='utf-8') as f:
|
|
207
|
+
content = f.read()
|
|
208
|
+
# Count @article, @book, @inproceedings, etc.
|
|
209
|
+
matches = re.findall(r'@\w+\s*{', content)
|
|
210
|
+
return len(matches)
|
|
211
|
+
except Exception:
|
|
212
|
+
return 0
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def extract_citation_style(bib_file: Optional[str]) -> str:
|
|
216
|
+
"""
|
|
217
|
+
Try to extract citation style from BibTeX file or paper metadata.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
bib_file: Path to the .bib file.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Citation style name (default: "BibTeX").
|
|
224
|
+
"""
|
|
225
|
+
# This is a simple heuristic - in practice, the style might be
|
|
226
|
+
# specified in the LaTeX file or progress log
|
|
227
|
+
return "BibTeX"
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def count_words_in_tex(tex_file: Optional[str]) -> Optional[int]:
|
|
231
|
+
"""
|
|
232
|
+
Estimate word count in a LaTeX file.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
tex_file: Path to the .tex file.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Estimated word count, or None if file doesn't exist.
|
|
239
|
+
"""
|
|
240
|
+
if not tex_file or not Path(tex_file).exists():
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
with open(tex_file, 'r', encoding='utf-8') as f:
|
|
245
|
+
content = f.read()
|
|
246
|
+
|
|
247
|
+
# Remove LaTeX commands
|
|
248
|
+
content = re.sub(r'\\[a-zA-Z]+(\[.*?\])?(\{.*?\})?', '', content)
|
|
249
|
+
# Remove comments
|
|
250
|
+
content = re.sub(r'%.*', '', content)
|
|
251
|
+
# Remove special characters
|
|
252
|
+
content = re.sub(r'[{}$\\]', '', content)
|
|
253
|
+
|
|
254
|
+
# Count words
|
|
255
|
+
words = content.split()
|
|
256
|
+
return len(words)
|
|
257
|
+
except Exception:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def extract_title_from_tex(tex_file: Optional[str]) -> Optional[str]:
|
|
262
|
+
"""
|
|
263
|
+
Extract title from a LaTeX file.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
tex_file: Path to the .tex file.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Title string, or None if not found.
|
|
270
|
+
"""
|
|
271
|
+
if not tex_file or not Path(tex_file).exists():
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
with open(tex_file, 'r', encoding='utf-8') as f:
|
|
276
|
+
content = f.read()
|
|
277
|
+
|
|
278
|
+
# Look for \title{...}
|
|
279
|
+
match = re.search(r'\\title\s*\{([^}]+)\}', content)
|
|
280
|
+
if match:
|
|
281
|
+
title = match.group(1)
|
|
282
|
+
# Clean up LaTeX commands in title
|
|
283
|
+
title = re.sub(r'\\[a-zA-Z]+(\[.*?\])?(\{.*?\})?', '', title)
|
|
284
|
+
return title.strip()
|
|
285
|
+
except Exception:
|
|
286
|
+
pass
|
|
287
|
+
|
|
288
|
+
return None
|
|
289
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scientific-writer
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: AI-powered scientific writing with programmatic API and CLI - powered by Claude Sonnet 4.5 and the Claude Agents SDK
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: <=3.12,>=3.10
|
|
8
|
+
Requires-Dist: claude-agent-sdk>=0.1.0
|
|
9
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
10
|
+
Requires-Dist: requests>=2.31.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# Claude Scientific Writer
|
|
14
|
+
|
|
15
|
+
A Python package and CLI for generating publication-ready scientific papers with Claude Sonnet. Version 2.0 adds a fully typed, programmatic API while keeping the CLI 100% backward compatible.
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
### Prerequisites
|
|
20
|
+
- Python 3.10+
|
|
21
|
+
- uv (package and environment manager)
|
|
22
|
+
- ANTHROPIC_API_KEY (required), OPENROUTER_API_KEY (optional for research lookup)
|
|
23
|
+
|
|
24
|
+
### Install
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/yourusername/claude-scientific-writer.git
|
|
27
|
+
cd claude-scientific-writer
|
|
28
|
+
uv sync
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Configure API keys
|
|
32
|
+
```bash
|
|
33
|
+
# .env file (recommended)
|
|
34
|
+
echo "ANTHROPIC_API_KEY=your_key" > .env
|
|
35
|
+
echo "OPENROUTER_API_KEY=your_openrouter_key" >> .env
|
|
36
|
+
# or export in your shell
|
|
37
|
+
export ANTHROPIC_API_KEY='your_key'
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Use the CLI
|
|
41
|
+
```bash
|
|
42
|
+
uv run scientific-writer
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Use the Python API
|
|
46
|
+
```python
|
|
47
|
+
import asyncio
|
|
48
|
+
from scientific_writer import generate_paper
|
|
49
|
+
|
|
50
|
+
async def main():
|
|
51
|
+
async for update in generate_paper("Create a Nature paper on CRISPR gene editing"):
|
|
52
|
+
if update["type"] == "progress":
|
|
53
|
+
print(f"[{update['percentage']}%] {update['message']}")
|
|
54
|
+
else:
|
|
55
|
+
print(f"PDF: {update['files']['pdf_final']}")
|
|
56
|
+
|
|
57
|
+
asyncio.run(main())
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Features
|
|
61
|
+
- Scientific writing (IMRaD) with LaTeX and BibTeX outputs
|
|
62
|
+
- Real-time progress streaming and transparent logging
|
|
63
|
+
- Automatic bibliography and citation management
|
|
64
|
+
- Data and figure integration from a local data/ folder
|
|
65
|
+
- Research lookup via OpenRouter (optional)
|
|
66
|
+
- CLI and programmatic API with full type hints
|
|
67
|
+
|
|
68
|
+
## Typical Workflow
|
|
69
|
+
1. Place figures and data in data/ at the project root (images -> figures/, files -> data/ automatically).
|
|
70
|
+
2. Run the CLI (or use the API) and describe what you want (venue, topic, constraints).
|
|
71
|
+
3. Follow progress updates; outputs are saved under paper_outputs/<timestamp>_<topic>/.
|
|
72
|
+
|
|
73
|
+
## Documentation
|
|
74
|
+
- API Reference: Docs/API.md
|
|
75
|
+
- Troubleshooting: Docs/TROUBLESHOOTING.md
|
|
76
|
+
- Skills Overview: Docs/SKILLS.md
|
|
77
|
+
- Development and Contributing: Docs/DEVELOPMENT.md
|
|
78
|
+
- Releasing (versioning & publishing): Docs/RELEASING.md
|
|
79
|
+
- Release Notes: CHANGELOG.md
|
|
80
|
+
- System Instructions (for the agent): CLAUDE.md
|
|
81
|
+
|
|
82
|
+
## Versioning and Publishing (short)
|
|
83
|
+
Use `uv` and the helper scripts:
|
|
84
|
+
- Bump version (keeps pyproject + __init__ in sync): `uv run scripts/bump_version.py [patch|minor|major]`
|
|
85
|
+
- Build and publish: `uv run scripts/publish.py` (or `--bump patch|minor|major`)
|
|
86
|
+
See Docs/RELEASING.md for prerequisites, dry runs, tagging, and verification.
|
|
87
|
+
|
|
88
|
+
## Migration (v1.x -> v2.0)
|
|
89
|
+
- CLI remains unchanged (scientific-writer).
|
|
90
|
+
- New programmatic API: from scientific_writer import generate_paper.
|
|
91
|
+
- Legacy single-file script is replaced by a proper package; no action needed for CLI users.
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
MIT - see LICENSE.
|
|
95
|
+
|
|
96
|
+
## Support
|
|
97
|
+
- Open an issue on GitHub
|
|
98
|
+
- See Docs/TROUBLESHOOTING.md for common problems
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
scientific_writer/__init__.py,sha256=SRUR4yXJ9cTBAUYsit7d4g_4v-8zVPWmh2jCCJRwPMU,1182
|
|
2
|
+
scientific_writer/api.py,sha256=nWzBIvsTfCD_Are1-7kamhjqCHn37zc86js7S3QTdi0,12464
|
|
3
|
+
scientific_writer/cli.py,sha256=Hox01o87h33HSumLVHl70AkNinS-OT6KduSzmwqmx40,14592
|
|
4
|
+
scientific_writer/core.py,sha256=XsdXb-GuKY3ER9VXN1aBdcp4hcJXb4H6-e9wkZjJ1h4,6553
|
|
5
|
+
scientific_writer/models.py,sha256=KjRjMjn4GtbHLIUrx2EZB4omGcZeLbflaTJmfNV1M6Y,2629
|
|
6
|
+
scientific_writer/utils.py,sha256=z2nX3PDEcfW4pN_w47TDDC6Kmdcw5uFUGrT8T6lZYSg,9032
|
|
7
|
+
scientific_writer-2.0.0.dist-info/METADATA,sha256=hh-NsQdiVEOc-92EqmyoUhIr2NdBREx4R5uMQ860ZWc,3237
|
|
8
|
+
scientific_writer-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
scientific_writer-2.0.0.dist-info/entry_points.txt,sha256=pI1zUsWVV6eMkNEKfEmkKozOlLRZnhAZfXBsEyqXtqg,69
|
|
10
|
+
scientific_writer-2.0.0.dist-info/licenses/LICENSE,sha256=H6FOLY6X6QMEnqcbDoq5BM0sBf-K-e1SIBAv0zSwxa4,1070
|
|
11
|
+
scientific_writer-2.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 K-Dense Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|