scitex 2.11.0__py3-none-any.whl → 2.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__main__.py +24 -5
- scitex/__version__.py +1 -1
- scitex/_optional_deps.py +33 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +1 -1
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +2 -2
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +2 -2
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +2 -2
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +2 -2
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +2 -2
- scitex/ai/classification/timeseries/_normalize_timestamp.py +1 -1
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +1 -1
- scitex/ai/plt/_plot_feature_importance.py +1 -1
- scitex/ai/plt/_plot_learning_curve.py +1 -1
- scitex/ai/plt/_plot_optuna_study.py +1 -1
- scitex/ai/plt/_plot_pre_rec_curve.py +1 -1
- scitex/ai/plt/_plot_roc_curve.py +1 -1
- scitex/ai/plt/_stx_conf_mat.py +1 -1
- scitex/ai/training/_LearningCurveLogger.py +1 -1
- scitex/audio/mcp_server.py +38 -8
- scitex/browser/automation/CookieHandler.py +1 -1
- scitex/browser/core/BrowserMixin.py +1 -1
- scitex/browser/core/ChromeProfileManager.py +1 -1
- scitex/browser/debugging/_browser_logger.py +1 -1
- scitex/browser/debugging/_highlight_element.py +1 -1
- scitex/browser/debugging/_show_grid.py +1 -1
- scitex/browser/interaction/click_center.py +1 -1
- scitex/browser/interaction/click_with_fallbacks.py +1 -1
- scitex/browser/interaction/close_popups.py +1 -1
- scitex/browser/interaction/fill_with_fallbacks.py +1 -1
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +1 -1
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +1 -1
- scitex/browser/stealth/HumanBehavior.py +1 -1
- scitex/browser/stealth/StealthManager.py +1 -1
- scitex/canvas/_mcp_handlers.py +372 -0
- scitex/canvas/_mcp_tool_schemas.py +219 -0
- scitex/canvas/mcp_server.py +151 -0
- scitex/capture/mcp_server.py +41 -12
- scitex/cli/audio.py +233 -0
- scitex/cli/capture.py +307 -0
- scitex/cli/main.py +27 -4
- scitex/cli/repro.py +233 -0
- scitex/cli/resource.py +240 -0
- scitex/cli/stats.py +325 -0
- scitex/cli/template.py +236 -0
- scitex/cli/tex.py +286 -0
- scitex/cli/web.py +11 -12
- scitex/dev/__init__.py +3 -0
- scitex/dev/_pyproject.py +405 -0
- scitex/dev/plt/__init__.py +2 -2
- scitex/dev/plt/mpl/get_dir_ax.py +1 -1
- scitex/dev/plt/mpl/get_signatures.py +1 -1
- scitex/dev/plt/mpl/get_signatures_details.py +1 -1
- scitex/diagram/_mcp_handlers.py +400 -0
- scitex/diagram/_mcp_tool_schemas.py +157 -0
- scitex/diagram/mcp_server.py +151 -0
- scitex/dsp/_demo_sig.py +51 -5
- scitex/dsp/_mne.py +13 -2
- scitex/dsp/_modulation_index.py +15 -3
- scitex/dsp/_pac.py +23 -5
- scitex/dsp/_psd.py +16 -4
- scitex/dsp/_resample.py +24 -4
- scitex/dsp/_transform.py +16 -3
- scitex/dsp/add_noise.py +15 -1
- scitex/dsp/norm.py +17 -2
- scitex/dsp/reference.py +17 -1
- scitex/dsp/utils/_differential_bandpass_filters.py +20 -2
- scitex/dsp/utils/_zero_pad.py +18 -4
- scitex/dt/_normalize_timestamp.py +1 -1
- scitex/git/_session.py +1 -1
- scitex/io/_load_modules/_con.py +12 -1
- scitex/io/_load_modules/_eeg.py +12 -1
- scitex/io/_load_modules/_optuna.py +21 -63
- scitex/io/_load_modules/_torch.py +11 -3
- scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +13 -2
- scitex/io/_save_modules/_torch.py +11 -3
- scitex/mcp_server.py +159 -0
- scitex/plt/_mcp_handlers.py +361 -0
- scitex/plt/_mcp_tool_schemas.py +169 -0
- scitex/plt/mcp_server.py +205 -0
- scitex/repro/README_RandomStateManager.md +3 -3
- scitex/repro/_RandomStateManager.py +14 -14
- scitex/repro/_gen_ID.py +1 -1
- scitex/repro/_gen_timestamp.py +1 -1
- scitex/repro/_hash_array.py +4 -4
- scitex/scholar/__main__.py +24 -2
- scitex/scholar/_mcp_handlers.py +685 -0
- scitex/scholar/_mcp_tool_schemas.py +339 -0
- scitex/scholar/docs/template.py +1 -1
- scitex/scholar/examples/07_storage_integration.py +1 -1
- scitex/scholar/impact_factor/jcr/ImpactFactorJCREngine.py +1 -1
- scitex/scholar/impact_factor/jcr/build_database.py +1 -1
- scitex/scholar/mcp_server.py +315 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +1 -1
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +1 -1
- scitex/scholar/pipelines/ScholarPipelineParallel.py +1 -1
- scitex/scholar/pipelines/ScholarPipelineSingle.py +1 -1
- scitex/scholar/storage/PaperIO.py +1 -1
- scitex/session/README.md +4 -4
- scitex/session/__init__.py +1 -1
- scitex/session/_decorator.py +9 -9
- scitex/session/_lifecycle.py +5 -5
- scitex/session/template.py +1 -1
- scitex/stats/__main__.py +281 -0
- scitex/stats/_mcp_handlers.py +1191 -0
- scitex/stats/_mcp_tool_schemas.py +384 -0
- scitex/stats/correct/_correct_bonferroni.py +1 -1
- scitex/stats/correct/_correct_fdr.py +1 -1
- scitex/stats/correct/_correct_fdr_.py +1 -1
- scitex/stats/correct/_correct_holm.py +1 -1
- scitex/stats/correct/_correct_sidak.py +1 -1
- scitex/stats/effect_sizes/_cliffs_delta.py +1 -1
- scitex/stats/effect_sizes/_cohens_d.py +1 -1
- scitex/stats/effect_sizes/_epsilon_squared.py +1 -1
- scitex/stats/effect_sizes/_eta_squared.py +1 -1
- scitex/stats/effect_sizes/_prob_superiority.py +1 -1
- scitex/stats/mcp_server.py +405 -0
- scitex/stats/posthoc/_dunnett.py +1 -1
- scitex/stats/posthoc/_games_howell.py +1 -1
- scitex/stats/posthoc/_tukey_hsd.py +1 -1
- scitex/stats/power/_power.py +1 -1
- scitex/stats/utils/_effect_size.py +1 -1
- scitex/stats/utils/_formatters.py +1 -1
- scitex/stats/utils/_power.py +1 -1
- scitex/template/_mcp_handlers.py +259 -0
- scitex/template/_mcp_tool_schemas.py +112 -0
- scitex/template/mcp_server.py +186 -0
- scitex/utils/_verify_scitex_format.py +2 -2
- scitex/utils/template.py +1 -1
- scitex/web/__init__.py +12 -11
- scitex/web/_scraping.py +26 -265
- scitex/web/download_images.py +316 -0
- scitex/writer/Writer.py +1 -1
- scitex/writer/_clone_writer_project.py +1 -1
- scitex/writer/_validate_tree_structures.py +1 -1
- scitex/writer/dataclasses/config/_WriterConfig.py +1 -1
- scitex/writer/dataclasses/contents/_ManuscriptContents.py +1 -1
- scitex/writer/dataclasses/core/_Document.py +1 -1
- scitex/writer/dataclasses/core/_DocumentSection.py +1 -1
- scitex/writer/dataclasses/results/_CompilationResult.py +1 -1
- scitex/writer/dataclasses/results/_LaTeXIssue.py +1 -1
- scitex/writer/utils/.legacy_git_retry.py +7 -5
- scitex/writer/utils/_parse_latex_logs.py +1 -1
- {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/METADATA +431 -269
- {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/RECORD +147 -118
- scitex-2.13.0.dist-info/entry_points.txt +11 -0
- scitex-2.11.0.dist-info/entry_points.txt +0 -2
- {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/WHEEL +0 -0
- {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-08
|
|
3
|
+
# File: src/scitex/scholar/_mcp_handlers.py
|
|
4
|
+
# ----------------------------------------
|
|
5
|
+
|
|
6
|
+
"""Handler implementations for the scitex-scholar MCP server."""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"search_papers_handler",
|
|
18
|
+
"resolve_dois_handler",
|
|
19
|
+
"enrich_bibtex_handler",
|
|
20
|
+
"download_pdf_handler",
|
|
21
|
+
"download_pdfs_batch_handler",
|
|
22
|
+
"get_library_status_handler",
|
|
23
|
+
"parse_bibtex_handler",
|
|
24
|
+
"validate_pdfs_handler",
|
|
25
|
+
"resolve_openurls_handler",
|
|
26
|
+
"authenticate_handler",
|
|
27
|
+
"export_papers_handler",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_scholar_dir() -> Path:
|
|
32
|
+
"""Get the scholar data directory."""
|
|
33
|
+
base_dir = Path(os.getenv("SCITEX_DIR", Path.home() / ".scitex"))
|
|
34
|
+
scholar_dir = base_dir / "scholar"
|
|
35
|
+
scholar_dir.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
return scholar_dir
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _ensure_scholar():
|
|
40
|
+
"""Ensure Scholar module is available and return instance."""
|
|
41
|
+
try:
|
|
42
|
+
from scitex.scholar import Scholar
|
|
43
|
+
|
|
44
|
+
return Scholar()
|
|
45
|
+
except ImportError as e:
|
|
46
|
+
raise RuntimeError(f"Scholar module not available: {e}")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def search_papers_handler(
|
|
50
|
+
query: str,
|
|
51
|
+
sources: list[str] | None = None,
|
|
52
|
+
limit: int = 20,
|
|
53
|
+
year_min: int | None = None,
|
|
54
|
+
year_max: int | None = None,
|
|
55
|
+
) -> dict:
|
|
56
|
+
"""Search for scientific papers across multiple databases."""
|
|
57
|
+
try:
|
|
58
|
+
from scitex.scholar import Scholar
|
|
59
|
+
|
|
60
|
+
loop = asyncio.get_event_loop()
|
|
61
|
+
scholar = Scholar()
|
|
62
|
+
|
|
63
|
+
def do_search():
|
|
64
|
+
kwargs = {"limit": limit}
|
|
65
|
+
if sources:
|
|
66
|
+
kwargs["sources"] = sources
|
|
67
|
+
if year_min:
|
|
68
|
+
kwargs["year_min"] = year_min
|
|
69
|
+
if year_max:
|
|
70
|
+
kwargs["year_max"] = year_max
|
|
71
|
+
|
|
72
|
+
papers = scholar.search(query, **kwargs)
|
|
73
|
+
return papers
|
|
74
|
+
|
|
75
|
+
papers = await loop.run_in_executor(None, do_search)
|
|
76
|
+
|
|
77
|
+
results = []
|
|
78
|
+
for paper in papers:
|
|
79
|
+
results.append(
|
|
80
|
+
{
|
|
81
|
+
"title": paper.title,
|
|
82
|
+
"authors": paper.authors[:5] if paper.authors else [],
|
|
83
|
+
"year": paper.year,
|
|
84
|
+
"doi": paper.doi,
|
|
85
|
+
"journal": paper.journal,
|
|
86
|
+
"abstract": (
|
|
87
|
+
paper.abstract[:300] + "..."
|
|
88
|
+
if paper.abstract and len(paper.abstract) > 300
|
|
89
|
+
else paper.abstract
|
|
90
|
+
),
|
|
91
|
+
"citation_count": paper.citation_count,
|
|
92
|
+
"impact_factor": paper.impact_factor,
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"success": True,
|
|
98
|
+
"count": len(results),
|
|
99
|
+
"query": query,
|
|
100
|
+
"papers": results,
|
|
101
|
+
"timestamp": datetime.now().isoformat(),
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
except Exception as e:
|
|
105
|
+
return {"success": False, "error": str(e)}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def resolve_dois_handler(
|
|
109
|
+
bibtex_path: str | None = None,
|
|
110
|
+
titles: list[str] | None = None,
|
|
111
|
+
resume: bool = True,
|
|
112
|
+
project: str | None = None,
|
|
113
|
+
) -> dict:
|
|
114
|
+
"""Resolve DOIs from paper titles."""
|
|
115
|
+
try:
|
|
116
|
+
from scitex.scholar import Scholar
|
|
117
|
+
|
|
118
|
+
loop = asyncio.get_event_loop()
|
|
119
|
+
scholar = Scholar(project=project) if project else Scholar()
|
|
120
|
+
|
|
121
|
+
def do_resolve():
|
|
122
|
+
if bibtex_path:
|
|
123
|
+
# Load papers from BibTeX and resolve DOIs
|
|
124
|
+
papers = scholar.from_bibtex(bibtex_path)
|
|
125
|
+
resolved = []
|
|
126
|
+
failed = []
|
|
127
|
+
|
|
128
|
+
for paper in papers:
|
|
129
|
+
if not paper.doi:
|
|
130
|
+
# Try to resolve DOI from title
|
|
131
|
+
try:
|
|
132
|
+
doi = scholar.resolve_doi(
|
|
133
|
+
paper.title, paper.authors, paper.year
|
|
134
|
+
)
|
|
135
|
+
if doi:
|
|
136
|
+
paper.doi = doi
|
|
137
|
+
resolved.append({"title": paper.title, "doi": doi})
|
|
138
|
+
else:
|
|
139
|
+
failed.append(
|
|
140
|
+
{"title": paper.title, "reason": "No DOI found"}
|
|
141
|
+
)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
failed.append({"title": paper.title, "reason": str(e)})
|
|
144
|
+
else:
|
|
145
|
+
resolved.append({"title": paper.title, "doi": paper.doi})
|
|
146
|
+
|
|
147
|
+
return {"resolved": resolved, "failed": failed, "total": len(papers)}
|
|
148
|
+
|
|
149
|
+
elif titles:
|
|
150
|
+
resolved = []
|
|
151
|
+
failed = []
|
|
152
|
+
|
|
153
|
+
for title in titles:
|
|
154
|
+
try:
|
|
155
|
+
doi = scholar.resolve_doi(title)
|
|
156
|
+
if doi:
|
|
157
|
+
resolved.append({"title": title, "doi": doi})
|
|
158
|
+
else:
|
|
159
|
+
failed.append({"title": title, "reason": "No DOI found"})
|
|
160
|
+
except Exception as e:
|
|
161
|
+
failed.append({"title": title, "reason": str(e)})
|
|
162
|
+
|
|
163
|
+
return {"resolved": resolved, "failed": failed, "total": len(titles)}
|
|
164
|
+
|
|
165
|
+
else:
|
|
166
|
+
return {"error": "Either bibtex_path or titles required"}
|
|
167
|
+
|
|
168
|
+
result = await loop.run_in_executor(None, do_resolve)
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"success": True,
|
|
172
|
+
**result,
|
|
173
|
+
"timestamp": datetime.now().isoformat(),
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
except Exception as e:
|
|
177
|
+
return {"success": False, "error": str(e)}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def enrich_bibtex_handler(
|
|
181
|
+
bibtex_path: str,
|
|
182
|
+
output_path: str | None = None,
|
|
183
|
+
add_abstracts: bool = True,
|
|
184
|
+
add_citations: bool = True,
|
|
185
|
+
add_impact_factors: bool = True,
|
|
186
|
+
) -> dict:
|
|
187
|
+
"""Enrich BibTeX entries with metadata."""
|
|
188
|
+
try:
|
|
189
|
+
from scitex.scholar import Scholar
|
|
190
|
+
|
|
191
|
+
loop = asyncio.get_event_loop()
|
|
192
|
+
scholar = Scholar()
|
|
193
|
+
|
|
194
|
+
def do_enrich():
|
|
195
|
+
papers = scholar.from_bibtex(bibtex_path)
|
|
196
|
+
enriched_count = 0
|
|
197
|
+
|
|
198
|
+
for paper in papers:
|
|
199
|
+
enriched = False
|
|
200
|
+
|
|
201
|
+
if add_abstracts and not paper.abstract:
|
|
202
|
+
# Try to fetch abstract
|
|
203
|
+
pass # Will be handled by scholar.enrich()
|
|
204
|
+
|
|
205
|
+
if add_citations and not paper.citation_count:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
if add_impact_factors and not paper.impact_factor:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
if enriched:
|
|
212
|
+
enriched_count += 1
|
|
213
|
+
|
|
214
|
+
# Use scholar's enrich method
|
|
215
|
+
papers = scholar.enrich(papers)
|
|
216
|
+
|
|
217
|
+
# Save to output
|
|
218
|
+
out_path = output_path or bibtex_path.replace(".bib", "-enriched.bib")
|
|
219
|
+
papers.save(out_path)
|
|
220
|
+
|
|
221
|
+
summary = {
|
|
222
|
+
"total": len(papers),
|
|
223
|
+
"with_doi": sum(1 for p in papers if p.doi),
|
|
224
|
+
"with_abstract": sum(1 for p in papers if p.abstract),
|
|
225
|
+
"with_citations": sum(1 for p in papers if p.citation_count),
|
|
226
|
+
"with_impact_factor": sum(1 for p in papers if p.impact_factor),
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return {"output_path": out_path, "summary": summary}
|
|
230
|
+
|
|
231
|
+
result = await loop.run_in_executor(None, do_enrich)
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
"success": True,
|
|
235
|
+
**result,
|
|
236
|
+
"timestamp": datetime.now().isoformat(),
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
return {"success": False, "error": str(e)}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
async def download_pdf_handler(
|
|
244
|
+
doi: str,
|
|
245
|
+
output_dir: str = "./pdfs",
|
|
246
|
+
auth_method: str = "none",
|
|
247
|
+
use_browser: bool = False,
|
|
248
|
+
) -> dict:
|
|
249
|
+
"""Download a single PDF."""
|
|
250
|
+
try:
|
|
251
|
+
from scitex.scholar import Scholar
|
|
252
|
+
from scitex.scholar.core import Paper
|
|
253
|
+
|
|
254
|
+
loop = asyncio.get_event_loop()
|
|
255
|
+
scholar = Scholar()
|
|
256
|
+
|
|
257
|
+
def do_download():
|
|
258
|
+
paper = Paper(doi=doi)
|
|
259
|
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
260
|
+
|
|
261
|
+
result = scholar.download_pdf(
|
|
262
|
+
paper,
|
|
263
|
+
output_dir=output_dir,
|
|
264
|
+
use_browser=use_browser,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return result
|
|
268
|
+
|
|
269
|
+
result = await loop.run_in_executor(None, do_download)
|
|
270
|
+
|
|
271
|
+
if result:
|
|
272
|
+
return {
|
|
273
|
+
"success": True,
|
|
274
|
+
"doi": doi,
|
|
275
|
+
"path": str(result),
|
|
276
|
+
"timestamp": datetime.now().isoformat(),
|
|
277
|
+
}
|
|
278
|
+
else:
|
|
279
|
+
return {
|
|
280
|
+
"success": False,
|
|
281
|
+
"doi": doi,
|
|
282
|
+
"error": "Download failed",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
except Exception as e:
|
|
286
|
+
return {"success": False, "doi": doi, "error": str(e)}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
async def download_pdfs_batch_handler(
|
|
290
|
+
dois: list[str] | None = None,
|
|
291
|
+
bibtex_path: str | None = None,
|
|
292
|
+
project: str | None = None,
|
|
293
|
+
output_dir: str | None = None,
|
|
294
|
+
max_concurrent: int = 3,
|
|
295
|
+
resume: bool = True,
|
|
296
|
+
) -> dict:
|
|
297
|
+
"""Download PDFs for multiple papers."""
|
|
298
|
+
try:
|
|
299
|
+
from scitex.scholar import Scholar
|
|
300
|
+
from scitex.scholar.core import Paper
|
|
301
|
+
|
|
302
|
+
loop = asyncio.get_event_loop()
|
|
303
|
+
scholar = Scholar(project=project) if project else Scholar()
|
|
304
|
+
|
|
305
|
+
def do_batch_download():
|
|
306
|
+
papers = []
|
|
307
|
+
|
|
308
|
+
if bibtex_path:
|
|
309
|
+
papers = scholar.from_bibtex(bibtex_path)
|
|
310
|
+
elif dois:
|
|
311
|
+
papers = [Paper(doi=d) for d in dois]
|
|
312
|
+
else:
|
|
313
|
+
return {"error": "Either dois or bibtex_path required"}
|
|
314
|
+
|
|
315
|
+
out_dir = output_dir or str(
|
|
316
|
+
_get_scholar_dir() / "library" / (project or "default") / "pdfs"
|
|
317
|
+
)
|
|
318
|
+
Path(out_dir).mkdir(parents=True, exist_ok=True)
|
|
319
|
+
|
|
320
|
+
results = {
|
|
321
|
+
"total": len(papers),
|
|
322
|
+
"downloaded": [],
|
|
323
|
+
"failed": [],
|
|
324
|
+
"skipped": [],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for paper in papers:
|
|
328
|
+
if not paper.doi:
|
|
329
|
+
results["skipped"].append(
|
|
330
|
+
{"title": paper.title, "reason": "No DOI"}
|
|
331
|
+
)
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
pdf_path = scholar.download_pdf(paper, output_dir=out_dir)
|
|
336
|
+
if pdf_path:
|
|
337
|
+
results["downloaded"].append(
|
|
338
|
+
{
|
|
339
|
+
"doi": paper.doi,
|
|
340
|
+
"path": str(pdf_path),
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
else:
|
|
344
|
+
results["failed"].append(
|
|
345
|
+
{
|
|
346
|
+
"doi": paper.doi,
|
|
347
|
+
"reason": "Download returned None",
|
|
348
|
+
}
|
|
349
|
+
)
|
|
350
|
+
except Exception as e:
|
|
351
|
+
results["failed"].append(
|
|
352
|
+
{
|
|
353
|
+
"doi": paper.doi,
|
|
354
|
+
"reason": str(e),
|
|
355
|
+
}
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
return results
|
|
359
|
+
|
|
360
|
+
result = await loop.run_in_executor(None, do_batch_download)
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
"success": True,
|
|
364
|
+
**result,
|
|
365
|
+
"timestamp": datetime.now().isoformat(),
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
except Exception as e:
|
|
369
|
+
return {"success": False, "error": str(e)}
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
async def get_library_status_handler(
|
|
373
|
+
project: str | None = None,
|
|
374
|
+
include_details: bool = False,
|
|
375
|
+
) -> dict:
|
|
376
|
+
"""Get library status."""
|
|
377
|
+
try:
|
|
378
|
+
|
|
379
|
+
library_dir = _get_scholar_dir() / "library"
|
|
380
|
+
|
|
381
|
+
if project:
|
|
382
|
+
project_dir = library_dir / project
|
|
383
|
+
else:
|
|
384
|
+
project_dir = library_dir
|
|
385
|
+
|
|
386
|
+
if not project_dir.exists():
|
|
387
|
+
return {
|
|
388
|
+
"success": True,
|
|
389
|
+
"exists": False,
|
|
390
|
+
"message": f"Library directory not found: {project_dir}",
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
# Count PDFs
|
|
394
|
+
pdf_files = list(project_dir.rglob("*.pdf"))
|
|
395
|
+
metadata_files = list(project_dir.rglob("metadata.json"))
|
|
396
|
+
|
|
397
|
+
status = {
|
|
398
|
+
"success": True,
|
|
399
|
+
"exists": True,
|
|
400
|
+
"path": str(project_dir),
|
|
401
|
+
"pdf_count": len(pdf_files),
|
|
402
|
+
"entry_count": len(metadata_files),
|
|
403
|
+
"timestamp": datetime.now().isoformat(),
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
if include_details:
|
|
407
|
+
entries = []
|
|
408
|
+
for meta_file in metadata_files[:50]: # Limit to 50 for performance
|
|
409
|
+
try:
|
|
410
|
+
with open(meta_file) as f:
|
|
411
|
+
meta = json.load(f)
|
|
412
|
+
pdf_exists = any(
|
|
413
|
+
(meta_file.parent / f).exists()
|
|
414
|
+
for f in meta_file.parent.glob("*.pdf")
|
|
415
|
+
)
|
|
416
|
+
entries.append(
|
|
417
|
+
{
|
|
418
|
+
"id": meta_file.parent.name,
|
|
419
|
+
"title": meta.get("title", "Unknown"),
|
|
420
|
+
"doi": meta.get("doi"),
|
|
421
|
+
"has_pdf": pdf_exists,
|
|
422
|
+
}
|
|
423
|
+
)
|
|
424
|
+
except Exception:
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
status["entries"] = entries
|
|
428
|
+
|
|
429
|
+
return status
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
432
|
+
return {"success": False, "error": str(e)}
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
async def parse_bibtex_handler(bibtex_path: str) -> dict:
|
|
436
|
+
"""Parse a BibTeX file."""
|
|
437
|
+
try:
|
|
438
|
+
from scitex.scholar import Scholar
|
|
439
|
+
|
|
440
|
+
loop = asyncio.get_event_loop()
|
|
441
|
+
scholar = Scholar()
|
|
442
|
+
|
|
443
|
+
def do_parse():
|
|
444
|
+
papers = scholar.from_bibtex(bibtex_path)
|
|
445
|
+
return papers
|
|
446
|
+
|
|
447
|
+
papers = await loop.run_in_executor(None, do_parse)
|
|
448
|
+
|
|
449
|
+
results = []
|
|
450
|
+
for paper in papers:
|
|
451
|
+
results.append(
|
|
452
|
+
{
|
|
453
|
+
"title": paper.title,
|
|
454
|
+
"authors": paper.authors[:5] if paper.authors else [],
|
|
455
|
+
"year": paper.year,
|
|
456
|
+
"doi": paper.doi,
|
|
457
|
+
"journal": paper.journal,
|
|
458
|
+
"bibtex_key": getattr(paper, "bibtex_key", None),
|
|
459
|
+
}
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return {
|
|
463
|
+
"success": True,
|
|
464
|
+
"count": len(results),
|
|
465
|
+
"path": bibtex_path,
|
|
466
|
+
"papers": results,
|
|
467
|
+
"timestamp": datetime.now().isoformat(),
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
except Exception as e:
|
|
471
|
+
return {"success": False, "error": str(e)}
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
async def validate_pdfs_handler(
|
|
475
|
+
project: str | None = None,
|
|
476
|
+
pdf_paths: list[str] | None = None,
|
|
477
|
+
) -> dict:
|
|
478
|
+
"""Validate PDF files."""
|
|
479
|
+
try:
|
|
480
|
+
from PyPDF2 import PdfReader
|
|
481
|
+
|
|
482
|
+
if pdf_paths:
|
|
483
|
+
paths = [Path(p) for p in pdf_paths]
|
|
484
|
+
elif project:
|
|
485
|
+
library_dir = _get_scholar_dir() / "library" / project
|
|
486
|
+
paths = list(library_dir.rglob("*.pdf"))
|
|
487
|
+
else:
|
|
488
|
+
library_dir = _get_scholar_dir() / "library"
|
|
489
|
+
paths = list(library_dir.rglob("*.pdf"))
|
|
490
|
+
|
|
491
|
+
results = {
|
|
492
|
+
"total": len(paths),
|
|
493
|
+
"valid": [],
|
|
494
|
+
"invalid": [],
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
for pdf_path in paths:
|
|
498
|
+
try:
|
|
499
|
+
reader = PdfReader(str(pdf_path))
|
|
500
|
+
page_count = len(reader.pages)
|
|
501
|
+
|
|
502
|
+
# Check if it has text content
|
|
503
|
+
has_text = False
|
|
504
|
+
if page_count > 0:
|
|
505
|
+
text = reader.pages[0].extract_text()
|
|
506
|
+
has_text = bool(text and len(text.strip()) > 100)
|
|
507
|
+
|
|
508
|
+
results["valid"].append(
|
|
509
|
+
{
|
|
510
|
+
"path": str(pdf_path),
|
|
511
|
+
"pages": page_count,
|
|
512
|
+
"has_text": has_text,
|
|
513
|
+
"size_kb": round(pdf_path.stat().st_size / 1024, 2),
|
|
514
|
+
}
|
|
515
|
+
)
|
|
516
|
+
except Exception as e:
|
|
517
|
+
results["invalid"].append(
|
|
518
|
+
{
|
|
519
|
+
"path": str(pdf_path),
|
|
520
|
+
"error": str(e),
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
return {
|
|
525
|
+
"success": True,
|
|
526
|
+
**results,
|
|
527
|
+
"valid_count": len(results["valid"]),
|
|
528
|
+
"invalid_count": len(results["invalid"]),
|
|
529
|
+
"timestamp": datetime.now().isoformat(),
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
except ImportError:
|
|
533
|
+
return {"success": False, "error": "PyPDF2 not installed"}
|
|
534
|
+
except Exception as e:
|
|
535
|
+
return {"success": False, "error": str(e)}
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
async def resolve_openurls_handler(
|
|
539
|
+
dois: list[str],
|
|
540
|
+
resolver_url: str | None = None,
|
|
541
|
+
resume: bool = True,
|
|
542
|
+
) -> dict:
|
|
543
|
+
"""Resolve OpenURLs for DOIs."""
|
|
544
|
+
try:
|
|
545
|
+
from scitex.scholar import Scholar
|
|
546
|
+
|
|
547
|
+
loop = asyncio.get_event_loop()
|
|
548
|
+
scholar = Scholar()
|
|
549
|
+
|
|
550
|
+
def do_resolve():
|
|
551
|
+
results = {
|
|
552
|
+
"resolved": [],
|
|
553
|
+
"failed": [],
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
for doi in dois:
|
|
557
|
+
try:
|
|
558
|
+
# Use scholar's OpenURL resolver
|
|
559
|
+
url = scholar.resolve_openurl(doi, resolver_url=resolver_url)
|
|
560
|
+
if url:
|
|
561
|
+
results["resolved"].append({"doi": doi, "url": url})
|
|
562
|
+
else:
|
|
563
|
+
results["failed"].append({"doi": doi, "reason": "No URL found"})
|
|
564
|
+
except Exception as e:
|
|
565
|
+
results["failed"].append({"doi": doi, "reason": str(e)})
|
|
566
|
+
|
|
567
|
+
return results
|
|
568
|
+
|
|
569
|
+
result = await loop.run_in_executor(None, do_resolve)
|
|
570
|
+
|
|
571
|
+
return {
|
|
572
|
+
"success": True,
|
|
573
|
+
**result,
|
|
574
|
+
"total": len(dois),
|
|
575
|
+
"timestamp": datetime.now().isoformat(),
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
except Exception as e:
|
|
579
|
+
return {"success": False, "error": str(e)}
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
async def authenticate_handler(
|
|
583
|
+
method: str,
|
|
584
|
+
institution: str | None = None,
|
|
585
|
+
) -> dict:
|
|
586
|
+
"""Authenticate with institutional access."""
|
|
587
|
+
try:
|
|
588
|
+
from scitex.scholar import ScholarAuthManager
|
|
589
|
+
|
|
590
|
+
loop = asyncio.get_event_loop()
|
|
591
|
+
|
|
592
|
+
def do_auth():
|
|
593
|
+
auth_manager = ScholarAuthManager()
|
|
594
|
+
|
|
595
|
+
if method == "openathens":
|
|
596
|
+
success = auth_manager.authenticate_openathens(institution)
|
|
597
|
+
elif method == "shibboleth":
|
|
598
|
+
success = auth_manager.authenticate_shibboleth(institution)
|
|
599
|
+
else:
|
|
600
|
+
return {"error": f"Unknown auth method: {method}"}
|
|
601
|
+
|
|
602
|
+
return {"authenticated": success}
|
|
603
|
+
|
|
604
|
+
result = await loop.run_in_executor(None, do_auth)
|
|
605
|
+
|
|
606
|
+
return {
|
|
607
|
+
"success": True,
|
|
608
|
+
"method": method,
|
|
609
|
+
"institution": institution,
|
|
610
|
+
**result,
|
|
611
|
+
"timestamp": datetime.now().isoformat(),
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
except Exception as e:
|
|
615
|
+
return {"success": False, "error": str(e)}
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
async def export_papers_handler(
|
|
619
|
+
output_path: str,
|
|
620
|
+
project: str | None = None,
|
|
621
|
+
format: str = "bibtex",
|
|
622
|
+
filter_has_pdf: bool = False,
|
|
623
|
+
) -> dict:
|
|
624
|
+
"""Export papers to various formats."""
|
|
625
|
+
try:
|
|
626
|
+
from scitex.scholar import Scholar
|
|
627
|
+
|
|
628
|
+
loop = asyncio.get_event_loop()
|
|
629
|
+
scholar = Scholar(project=project) if project else Scholar()
|
|
630
|
+
|
|
631
|
+
def do_export():
|
|
632
|
+
# Get papers from library
|
|
633
|
+
papers = scholar.get_library_papers(project=project)
|
|
634
|
+
|
|
635
|
+
if filter_has_pdf:
|
|
636
|
+
papers = [p for p in papers if hasattr(p, "pdf_path") and p.pdf_path]
|
|
637
|
+
|
|
638
|
+
# Export based on format
|
|
639
|
+
out_path = Path(output_path)
|
|
640
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
641
|
+
|
|
642
|
+
if format == "bibtex":
|
|
643
|
+
papers.save(str(out_path))
|
|
644
|
+
elif format == "json":
|
|
645
|
+
with open(out_path, "w") as f:
|
|
646
|
+
json.dump([p.to_dict() for p in papers], f, indent=2)
|
|
647
|
+
elif format == "csv":
|
|
648
|
+
import csv
|
|
649
|
+
|
|
650
|
+
with open(out_path, "w", newline="") as f:
|
|
651
|
+
writer = csv.DictWriter(
|
|
652
|
+
f, fieldnames=["title", "authors", "year", "doi", "journal"]
|
|
653
|
+
)
|
|
654
|
+
writer.writeheader()
|
|
655
|
+
for p in papers:
|
|
656
|
+
writer.writerow(
|
|
657
|
+
{
|
|
658
|
+
"title": p.title,
|
|
659
|
+
"authors": (
|
|
660
|
+
"; ".join(p.authors[:3]) if p.authors else ""
|
|
661
|
+
),
|
|
662
|
+
"year": p.year,
|
|
663
|
+
"doi": p.doi,
|
|
664
|
+
"journal": p.journal,
|
|
665
|
+
}
|
|
666
|
+
)
|
|
667
|
+
elif format == "ris":
|
|
668
|
+
papers.save(str(out_path), format="ris")
|
|
669
|
+
|
|
670
|
+
return {"count": len(papers), "path": str(out_path)}
|
|
671
|
+
|
|
672
|
+
result = await loop.run_in_executor(None, do_export)
|
|
673
|
+
|
|
674
|
+
return {
|
|
675
|
+
"success": True,
|
|
676
|
+
"format": format,
|
|
677
|
+
**result,
|
|
678
|
+
"timestamp": datetime.now().isoformat(),
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
except Exception as e:
|
|
682
|
+
return {"success": False, "error": str(e)}
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
# EOF
|