scitex 2.11.0__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. scitex/__main__.py +24 -5
  2. scitex/__version__.py +1 -1
  3. scitex/_optional_deps.py +33 -0
  4. scitex/ai/classification/reporters/_ClassificationReporter.py +1 -1
  5. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +2 -2
  6. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +2 -2
  7. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +2 -2
  8. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +2 -2
  9. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +2 -2
  10. scitex/ai/classification/timeseries/_normalize_timestamp.py +1 -1
  11. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +1 -1
  12. scitex/ai/plt/_plot_feature_importance.py +1 -1
  13. scitex/ai/plt/_plot_learning_curve.py +1 -1
  14. scitex/ai/plt/_plot_optuna_study.py +1 -1
  15. scitex/ai/plt/_plot_pre_rec_curve.py +1 -1
  16. scitex/ai/plt/_plot_roc_curve.py +1 -1
  17. scitex/ai/plt/_stx_conf_mat.py +1 -1
  18. scitex/ai/training/_LearningCurveLogger.py +1 -1
  19. scitex/audio/mcp_server.py +38 -8
  20. scitex/browser/automation/CookieHandler.py +1 -1
  21. scitex/browser/core/BrowserMixin.py +1 -1
  22. scitex/browser/core/ChromeProfileManager.py +1 -1
  23. scitex/browser/debugging/_browser_logger.py +1 -1
  24. scitex/browser/debugging/_highlight_element.py +1 -1
  25. scitex/browser/debugging/_show_grid.py +1 -1
  26. scitex/browser/interaction/click_center.py +1 -1
  27. scitex/browser/interaction/click_with_fallbacks.py +1 -1
  28. scitex/browser/interaction/close_popups.py +1 -1
  29. scitex/browser/interaction/fill_with_fallbacks.py +1 -1
  30. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +1 -1
  31. scitex/browser/pdf/detect_chrome_pdf_viewer.py +1 -1
  32. scitex/browser/stealth/HumanBehavior.py +1 -1
  33. scitex/browser/stealth/StealthManager.py +1 -1
  34. scitex/canvas/_mcp_handlers.py +372 -0
  35. scitex/canvas/_mcp_tool_schemas.py +219 -0
  36. scitex/canvas/mcp_server.py +151 -0
  37. scitex/capture/mcp_server.py +41 -12
  38. scitex/cli/audio.py +233 -0
  39. scitex/cli/capture.py +307 -0
  40. scitex/cli/main.py +27 -4
  41. scitex/cli/repro.py +233 -0
  42. scitex/cli/resource.py +240 -0
  43. scitex/cli/stats.py +325 -0
  44. scitex/cli/template.py +236 -0
  45. scitex/cli/tex.py +286 -0
  46. scitex/cli/web.py +11 -12
  47. scitex/dev/__init__.py +3 -0
  48. scitex/dev/_pyproject.py +405 -0
  49. scitex/dev/plt/__init__.py +2 -2
  50. scitex/dev/plt/mpl/get_dir_ax.py +1 -1
  51. scitex/dev/plt/mpl/get_signatures.py +1 -1
  52. scitex/dev/plt/mpl/get_signatures_details.py +1 -1
  53. scitex/diagram/_mcp_handlers.py +400 -0
  54. scitex/diagram/_mcp_tool_schemas.py +157 -0
  55. scitex/diagram/mcp_server.py +151 -0
  56. scitex/dsp/_demo_sig.py +51 -5
  57. scitex/dsp/_mne.py +13 -2
  58. scitex/dsp/_modulation_index.py +15 -3
  59. scitex/dsp/_pac.py +23 -5
  60. scitex/dsp/_psd.py +16 -4
  61. scitex/dsp/_resample.py +24 -4
  62. scitex/dsp/_transform.py +16 -3
  63. scitex/dsp/add_noise.py +15 -1
  64. scitex/dsp/norm.py +17 -2
  65. scitex/dsp/reference.py +17 -1
  66. scitex/dsp/utils/_differential_bandpass_filters.py +20 -2
  67. scitex/dsp/utils/_zero_pad.py +18 -4
  68. scitex/dt/_normalize_timestamp.py +1 -1
  69. scitex/git/_session.py +1 -1
  70. scitex/io/_load_modules/_con.py +12 -1
  71. scitex/io/_load_modules/_eeg.py +12 -1
  72. scitex/io/_load_modules/_optuna.py +21 -63
  73. scitex/io/_load_modules/_torch.py +11 -3
  74. scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +13 -2
  75. scitex/io/_save_modules/_torch.py +11 -3
  76. scitex/mcp_server.py +159 -0
  77. scitex/plt/_mcp_handlers.py +361 -0
  78. scitex/plt/_mcp_tool_schemas.py +169 -0
  79. scitex/plt/mcp_server.py +205 -0
  80. scitex/repro/README_RandomStateManager.md +3 -3
  81. scitex/repro/_RandomStateManager.py +14 -14
  82. scitex/repro/_gen_ID.py +1 -1
  83. scitex/repro/_gen_timestamp.py +1 -1
  84. scitex/repro/_hash_array.py +4 -4
  85. scitex/scholar/__main__.py +24 -2
  86. scitex/scholar/_mcp_handlers.py +685 -0
  87. scitex/scholar/_mcp_tool_schemas.py +339 -0
  88. scitex/scholar/docs/template.py +1 -1
  89. scitex/scholar/examples/07_storage_integration.py +1 -1
  90. scitex/scholar/impact_factor/jcr/ImpactFactorJCREngine.py +1 -1
  91. scitex/scholar/impact_factor/jcr/build_database.py +1 -1
  92. scitex/scholar/mcp_server.py +315 -0
  93. scitex/scholar/pdf_download/ScholarPDFDownloader.py +1 -1
  94. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +1 -1
  95. scitex/scholar/pipelines/ScholarPipelineParallel.py +1 -1
  96. scitex/scholar/pipelines/ScholarPipelineSingle.py +1 -1
  97. scitex/scholar/storage/PaperIO.py +1 -1
  98. scitex/session/README.md +4 -4
  99. scitex/session/__init__.py +1 -1
  100. scitex/session/_decorator.py +9 -9
  101. scitex/session/_lifecycle.py +5 -5
  102. scitex/session/template.py +1 -1
  103. scitex/stats/__main__.py +281 -0
  104. scitex/stats/_mcp_handlers.py +1191 -0
  105. scitex/stats/_mcp_tool_schemas.py +384 -0
  106. scitex/stats/correct/_correct_bonferroni.py +1 -1
  107. scitex/stats/correct/_correct_fdr.py +1 -1
  108. scitex/stats/correct/_correct_fdr_.py +1 -1
  109. scitex/stats/correct/_correct_holm.py +1 -1
  110. scitex/stats/correct/_correct_sidak.py +1 -1
  111. scitex/stats/effect_sizes/_cliffs_delta.py +1 -1
  112. scitex/stats/effect_sizes/_cohens_d.py +1 -1
  113. scitex/stats/effect_sizes/_epsilon_squared.py +1 -1
  114. scitex/stats/effect_sizes/_eta_squared.py +1 -1
  115. scitex/stats/effect_sizes/_prob_superiority.py +1 -1
  116. scitex/stats/mcp_server.py +405 -0
  117. scitex/stats/posthoc/_dunnett.py +1 -1
  118. scitex/stats/posthoc/_games_howell.py +1 -1
  119. scitex/stats/posthoc/_tukey_hsd.py +1 -1
  120. scitex/stats/power/_power.py +1 -1
  121. scitex/stats/utils/_effect_size.py +1 -1
  122. scitex/stats/utils/_formatters.py +1 -1
  123. scitex/stats/utils/_power.py +1 -1
  124. scitex/template/_mcp_handlers.py +259 -0
  125. scitex/template/_mcp_tool_schemas.py +112 -0
  126. scitex/template/mcp_server.py +186 -0
  127. scitex/utils/_verify_scitex_format.py +2 -2
  128. scitex/utils/template.py +1 -1
  129. scitex/web/__init__.py +12 -11
  130. scitex/web/_scraping.py +26 -265
  131. scitex/web/download_images.py +316 -0
  132. scitex/writer/Writer.py +1 -1
  133. scitex/writer/_clone_writer_project.py +1 -1
  134. scitex/writer/_validate_tree_structures.py +1 -1
  135. scitex/writer/dataclasses/config/_WriterConfig.py +1 -1
  136. scitex/writer/dataclasses/contents/_ManuscriptContents.py +1 -1
  137. scitex/writer/dataclasses/core/_Document.py +1 -1
  138. scitex/writer/dataclasses/core/_DocumentSection.py +1 -1
  139. scitex/writer/dataclasses/results/_CompilationResult.py +1 -1
  140. scitex/writer/dataclasses/results/_LaTeXIssue.py +1 -1
  141. scitex/writer/utils/.legacy_git_retry.py +7 -5
  142. scitex/writer/utils/_parse_latex_logs.py +1 -1
  143. {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/METADATA +431 -269
  144. {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/RECORD +147 -118
  145. scitex-2.13.0.dist-info/entry_points.txt +11 -0
  146. scitex-2.11.0.dist-info/entry_points.txt +0 -2
  147. {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/WHEEL +0 -0
  148. {scitex-2.11.0.dist-info → scitex-2.13.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,685 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: 2026-01-08
3
+ # File: src/scitex/scholar/_mcp_handlers.py
4
+ # ----------------------------------------
5
+
6
+ """Handler implementations for the scitex-scholar MCP server."""
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import json
12
+ import os
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+
16
+ __all__ = [
17
+ "search_papers_handler",
18
+ "resolve_dois_handler",
19
+ "enrich_bibtex_handler",
20
+ "download_pdf_handler",
21
+ "download_pdfs_batch_handler",
22
+ "get_library_status_handler",
23
+ "parse_bibtex_handler",
24
+ "validate_pdfs_handler",
25
+ "resolve_openurls_handler",
26
+ "authenticate_handler",
27
+ "export_papers_handler",
28
+ ]
29
+
30
+
31
+ def _get_scholar_dir() -> Path:
32
+ """Get the scholar data directory."""
33
+ base_dir = Path(os.getenv("SCITEX_DIR", Path.home() / ".scitex"))
34
+ scholar_dir = base_dir / "scholar"
35
+ scholar_dir.mkdir(parents=True, exist_ok=True)
36
+ return scholar_dir
37
+
38
+
39
+ def _ensure_scholar():
40
+ """Ensure Scholar module is available and return instance."""
41
+ try:
42
+ from scitex.scholar import Scholar
43
+
44
+ return Scholar()
45
+ except ImportError as e:
46
+ raise RuntimeError(f"Scholar module not available: {e}")
47
+
48
+
49
+ async def search_papers_handler(
50
+ query: str,
51
+ sources: list[str] | None = None,
52
+ limit: int = 20,
53
+ year_min: int | None = None,
54
+ year_max: int | None = None,
55
+ ) -> dict:
56
+ """Search for scientific papers across multiple databases."""
57
+ try:
58
+ from scitex.scholar import Scholar
59
+
60
+ loop = asyncio.get_event_loop()
61
+ scholar = Scholar()
62
+
63
+ def do_search():
64
+ kwargs = {"limit": limit}
65
+ if sources:
66
+ kwargs["sources"] = sources
67
+ if year_min:
68
+ kwargs["year_min"] = year_min
69
+ if year_max:
70
+ kwargs["year_max"] = year_max
71
+
72
+ papers = scholar.search(query, **kwargs)
73
+ return papers
74
+
75
+ papers = await loop.run_in_executor(None, do_search)
76
+
77
+ results = []
78
+ for paper in papers:
79
+ results.append(
80
+ {
81
+ "title": paper.title,
82
+ "authors": paper.authors[:5] if paper.authors else [],
83
+ "year": paper.year,
84
+ "doi": paper.doi,
85
+ "journal": paper.journal,
86
+ "abstract": (
87
+ paper.abstract[:300] + "..."
88
+ if paper.abstract and len(paper.abstract) > 300
89
+ else paper.abstract
90
+ ),
91
+ "citation_count": paper.citation_count,
92
+ "impact_factor": paper.impact_factor,
93
+ }
94
+ )
95
+
96
+ return {
97
+ "success": True,
98
+ "count": len(results),
99
+ "query": query,
100
+ "papers": results,
101
+ "timestamp": datetime.now().isoformat(),
102
+ }
103
+
104
+ except Exception as e:
105
+ return {"success": False, "error": str(e)}
106
+
107
+
108
+ async def resolve_dois_handler(
109
+ bibtex_path: str | None = None,
110
+ titles: list[str] | None = None,
111
+ resume: bool = True,
112
+ project: str | None = None,
113
+ ) -> dict:
114
+ """Resolve DOIs from paper titles."""
115
+ try:
116
+ from scitex.scholar import Scholar
117
+
118
+ loop = asyncio.get_event_loop()
119
+ scholar = Scholar(project=project) if project else Scholar()
120
+
121
+ def do_resolve():
122
+ if bibtex_path:
123
+ # Load papers from BibTeX and resolve DOIs
124
+ papers = scholar.from_bibtex(bibtex_path)
125
+ resolved = []
126
+ failed = []
127
+
128
+ for paper in papers:
129
+ if not paper.doi:
130
+ # Try to resolve DOI from title
131
+ try:
132
+ doi = scholar.resolve_doi(
133
+ paper.title, paper.authors, paper.year
134
+ )
135
+ if doi:
136
+ paper.doi = doi
137
+ resolved.append({"title": paper.title, "doi": doi})
138
+ else:
139
+ failed.append(
140
+ {"title": paper.title, "reason": "No DOI found"}
141
+ )
142
+ except Exception as e:
143
+ failed.append({"title": paper.title, "reason": str(e)})
144
+ else:
145
+ resolved.append({"title": paper.title, "doi": paper.doi})
146
+
147
+ return {"resolved": resolved, "failed": failed, "total": len(papers)}
148
+
149
+ elif titles:
150
+ resolved = []
151
+ failed = []
152
+
153
+ for title in titles:
154
+ try:
155
+ doi = scholar.resolve_doi(title)
156
+ if doi:
157
+ resolved.append({"title": title, "doi": doi})
158
+ else:
159
+ failed.append({"title": title, "reason": "No DOI found"})
160
+ except Exception as e:
161
+ failed.append({"title": title, "reason": str(e)})
162
+
163
+ return {"resolved": resolved, "failed": failed, "total": len(titles)}
164
+
165
+ else:
166
+ return {"error": "Either bibtex_path or titles required"}
167
+
168
+ result = await loop.run_in_executor(None, do_resolve)
169
+
170
+ return {
171
+ "success": True,
172
+ **result,
173
+ "timestamp": datetime.now().isoformat(),
174
+ }
175
+
176
+ except Exception as e:
177
+ return {"success": False, "error": str(e)}
178
+
179
+
180
+ async def enrich_bibtex_handler(
181
+ bibtex_path: str,
182
+ output_path: str | None = None,
183
+ add_abstracts: bool = True,
184
+ add_citations: bool = True,
185
+ add_impact_factors: bool = True,
186
+ ) -> dict:
187
+ """Enrich BibTeX entries with metadata."""
188
+ try:
189
+ from scitex.scholar import Scholar
190
+
191
+ loop = asyncio.get_event_loop()
192
+ scholar = Scholar()
193
+
194
+ def do_enrich():
195
+ papers = scholar.from_bibtex(bibtex_path)
196
+ enriched_count = 0
197
+
198
+ for paper in papers:
199
+ enriched = False
200
+
201
+ if add_abstracts and not paper.abstract:
202
+ # Try to fetch abstract
203
+ pass # Will be handled by scholar.enrich()
204
+
205
+ if add_citations and not paper.citation_count:
206
+ pass
207
+
208
+ if add_impact_factors and not paper.impact_factor:
209
+ pass
210
+
211
+ if enriched:
212
+ enriched_count += 1
213
+
214
+ # Use scholar's enrich method
215
+ papers = scholar.enrich(papers)
216
+
217
+ # Save to output
218
+ out_path = output_path or bibtex_path.replace(".bib", "-enriched.bib")
219
+ papers.save(out_path)
220
+
221
+ summary = {
222
+ "total": len(papers),
223
+ "with_doi": sum(1 for p in papers if p.doi),
224
+ "with_abstract": sum(1 for p in papers if p.abstract),
225
+ "with_citations": sum(1 for p in papers if p.citation_count),
226
+ "with_impact_factor": sum(1 for p in papers if p.impact_factor),
227
+ }
228
+
229
+ return {"output_path": out_path, "summary": summary}
230
+
231
+ result = await loop.run_in_executor(None, do_enrich)
232
+
233
+ return {
234
+ "success": True,
235
+ **result,
236
+ "timestamp": datetime.now().isoformat(),
237
+ }
238
+
239
+ except Exception as e:
240
+ return {"success": False, "error": str(e)}
241
+
242
+
243
+ async def download_pdf_handler(
244
+ doi: str,
245
+ output_dir: str = "./pdfs",
246
+ auth_method: str = "none",
247
+ use_browser: bool = False,
248
+ ) -> dict:
249
+ """Download a single PDF."""
250
+ try:
251
+ from scitex.scholar import Scholar
252
+ from scitex.scholar.core import Paper
253
+
254
+ loop = asyncio.get_event_loop()
255
+ scholar = Scholar()
256
+
257
+ def do_download():
258
+ paper = Paper(doi=doi)
259
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
260
+
261
+ result = scholar.download_pdf(
262
+ paper,
263
+ output_dir=output_dir,
264
+ use_browser=use_browser,
265
+ )
266
+
267
+ return result
268
+
269
+ result = await loop.run_in_executor(None, do_download)
270
+
271
+ if result:
272
+ return {
273
+ "success": True,
274
+ "doi": doi,
275
+ "path": str(result),
276
+ "timestamp": datetime.now().isoformat(),
277
+ }
278
+ else:
279
+ return {
280
+ "success": False,
281
+ "doi": doi,
282
+ "error": "Download failed",
283
+ }
284
+
285
+ except Exception as e:
286
+ return {"success": False, "doi": doi, "error": str(e)}
287
+
288
+
289
+ async def download_pdfs_batch_handler(
290
+ dois: list[str] | None = None,
291
+ bibtex_path: str | None = None,
292
+ project: str | None = None,
293
+ output_dir: str | None = None,
294
+ max_concurrent: int = 3,
295
+ resume: bool = True,
296
+ ) -> dict:
297
+ """Download PDFs for multiple papers."""
298
+ try:
299
+ from scitex.scholar import Scholar
300
+ from scitex.scholar.core import Paper
301
+
302
+ loop = asyncio.get_event_loop()
303
+ scholar = Scholar(project=project) if project else Scholar()
304
+
305
+ def do_batch_download():
306
+ papers = []
307
+
308
+ if bibtex_path:
309
+ papers = scholar.from_bibtex(bibtex_path)
310
+ elif dois:
311
+ papers = [Paper(doi=d) for d in dois]
312
+ else:
313
+ return {"error": "Either dois or bibtex_path required"}
314
+
315
+ out_dir = output_dir or str(
316
+ _get_scholar_dir() / "library" / (project or "default") / "pdfs"
317
+ )
318
+ Path(out_dir).mkdir(parents=True, exist_ok=True)
319
+
320
+ results = {
321
+ "total": len(papers),
322
+ "downloaded": [],
323
+ "failed": [],
324
+ "skipped": [],
325
+ }
326
+
327
+ for paper in papers:
328
+ if not paper.doi:
329
+ results["skipped"].append(
330
+ {"title": paper.title, "reason": "No DOI"}
331
+ )
332
+ continue
333
+
334
+ try:
335
+ pdf_path = scholar.download_pdf(paper, output_dir=out_dir)
336
+ if pdf_path:
337
+ results["downloaded"].append(
338
+ {
339
+ "doi": paper.doi,
340
+ "path": str(pdf_path),
341
+ }
342
+ )
343
+ else:
344
+ results["failed"].append(
345
+ {
346
+ "doi": paper.doi,
347
+ "reason": "Download returned None",
348
+ }
349
+ )
350
+ except Exception as e:
351
+ results["failed"].append(
352
+ {
353
+ "doi": paper.doi,
354
+ "reason": str(e),
355
+ }
356
+ )
357
+
358
+ return results
359
+
360
+ result = await loop.run_in_executor(None, do_batch_download)
361
+
362
+ return {
363
+ "success": True,
364
+ **result,
365
+ "timestamp": datetime.now().isoformat(),
366
+ }
367
+
368
+ except Exception as e:
369
+ return {"success": False, "error": str(e)}
370
+
371
+
372
+ async def get_library_status_handler(
373
+ project: str | None = None,
374
+ include_details: bool = False,
375
+ ) -> dict:
376
+ """Get library status."""
377
+ try:
378
+
379
+ library_dir = _get_scholar_dir() / "library"
380
+
381
+ if project:
382
+ project_dir = library_dir / project
383
+ else:
384
+ project_dir = library_dir
385
+
386
+ if not project_dir.exists():
387
+ return {
388
+ "success": True,
389
+ "exists": False,
390
+ "message": f"Library directory not found: {project_dir}",
391
+ }
392
+
393
+ # Count PDFs
394
+ pdf_files = list(project_dir.rglob("*.pdf"))
395
+ metadata_files = list(project_dir.rglob("metadata.json"))
396
+
397
+ status = {
398
+ "success": True,
399
+ "exists": True,
400
+ "path": str(project_dir),
401
+ "pdf_count": len(pdf_files),
402
+ "entry_count": len(metadata_files),
403
+ "timestamp": datetime.now().isoformat(),
404
+ }
405
+
406
+ if include_details:
407
+ entries = []
408
+ for meta_file in metadata_files[:50]: # Limit to 50 for performance
409
+ try:
410
+ with open(meta_file) as f:
411
+ meta = json.load(f)
412
+ pdf_exists = any(
413
+ (meta_file.parent / f).exists()
414
+ for f in meta_file.parent.glob("*.pdf")
415
+ )
416
+ entries.append(
417
+ {
418
+ "id": meta_file.parent.name,
419
+ "title": meta.get("title", "Unknown"),
420
+ "doi": meta.get("doi"),
421
+ "has_pdf": pdf_exists,
422
+ }
423
+ )
424
+ except Exception:
425
+ pass
426
+
427
+ status["entries"] = entries
428
+
429
+ return status
430
+
431
+ except Exception as e:
432
+ return {"success": False, "error": str(e)}
433
+
434
+
435
+ async def parse_bibtex_handler(bibtex_path: str) -> dict:
436
+ """Parse a BibTeX file."""
437
+ try:
438
+ from scitex.scholar import Scholar
439
+
440
+ loop = asyncio.get_event_loop()
441
+ scholar = Scholar()
442
+
443
+ def do_parse():
444
+ papers = scholar.from_bibtex(bibtex_path)
445
+ return papers
446
+
447
+ papers = await loop.run_in_executor(None, do_parse)
448
+
449
+ results = []
450
+ for paper in papers:
451
+ results.append(
452
+ {
453
+ "title": paper.title,
454
+ "authors": paper.authors[:5] if paper.authors else [],
455
+ "year": paper.year,
456
+ "doi": paper.doi,
457
+ "journal": paper.journal,
458
+ "bibtex_key": getattr(paper, "bibtex_key", None),
459
+ }
460
+ )
461
+
462
+ return {
463
+ "success": True,
464
+ "count": len(results),
465
+ "path": bibtex_path,
466
+ "papers": results,
467
+ "timestamp": datetime.now().isoformat(),
468
+ }
469
+
470
+ except Exception as e:
471
+ return {"success": False, "error": str(e)}
472
+
473
+
474
+ async def validate_pdfs_handler(
475
+ project: str | None = None,
476
+ pdf_paths: list[str] | None = None,
477
+ ) -> dict:
478
+ """Validate PDF files."""
479
+ try:
480
+ from PyPDF2 import PdfReader
481
+
482
+ if pdf_paths:
483
+ paths = [Path(p) for p in pdf_paths]
484
+ elif project:
485
+ library_dir = _get_scholar_dir() / "library" / project
486
+ paths = list(library_dir.rglob("*.pdf"))
487
+ else:
488
+ library_dir = _get_scholar_dir() / "library"
489
+ paths = list(library_dir.rglob("*.pdf"))
490
+
491
+ results = {
492
+ "total": len(paths),
493
+ "valid": [],
494
+ "invalid": [],
495
+ }
496
+
497
+ for pdf_path in paths:
498
+ try:
499
+ reader = PdfReader(str(pdf_path))
500
+ page_count = len(reader.pages)
501
+
502
+ # Check if it has text content
503
+ has_text = False
504
+ if page_count > 0:
505
+ text = reader.pages[0].extract_text()
506
+ has_text = bool(text and len(text.strip()) > 100)
507
+
508
+ results["valid"].append(
509
+ {
510
+ "path": str(pdf_path),
511
+ "pages": page_count,
512
+ "has_text": has_text,
513
+ "size_kb": round(pdf_path.stat().st_size / 1024, 2),
514
+ }
515
+ )
516
+ except Exception as e:
517
+ results["invalid"].append(
518
+ {
519
+ "path": str(pdf_path),
520
+ "error": str(e),
521
+ }
522
+ )
523
+
524
+ return {
525
+ "success": True,
526
+ **results,
527
+ "valid_count": len(results["valid"]),
528
+ "invalid_count": len(results["invalid"]),
529
+ "timestamp": datetime.now().isoformat(),
530
+ }
531
+
532
+ except ImportError:
533
+ return {"success": False, "error": "PyPDF2 not installed"}
534
+ except Exception as e:
535
+ return {"success": False, "error": str(e)}
536
+
537
+
538
+ async def resolve_openurls_handler(
539
+ dois: list[str],
540
+ resolver_url: str | None = None,
541
+ resume: bool = True,
542
+ ) -> dict:
543
+ """Resolve OpenURLs for DOIs."""
544
+ try:
545
+ from scitex.scholar import Scholar
546
+
547
+ loop = asyncio.get_event_loop()
548
+ scholar = Scholar()
549
+
550
+ def do_resolve():
551
+ results = {
552
+ "resolved": [],
553
+ "failed": [],
554
+ }
555
+
556
+ for doi in dois:
557
+ try:
558
+ # Use scholar's OpenURL resolver
559
+ url = scholar.resolve_openurl(doi, resolver_url=resolver_url)
560
+ if url:
561
+ results["resolved"].append({"doi": doi, "url": url})
562
+ else:
563
+ results["failed"].append({"doi": doi, "reason": "No URL found"})
564
+ except Exception as e:
565
+ results["failed"].append({"doi": doi, "reason": str(e)})
566
+
567
+ return results
568
+
569
+ result = await loop.run_in_executor(None, do_resolve)
570
+
571
+ return {
572
+ "success": True,
573
+ **result,
574
+ "total": len(dois),
575
+ "timestamp": datetime.now().isoformat(),
576
+ }
577
+
578
+ except Exception as e:
579
+ return {"success": False, "error": str(e)}
580
+
581
+
582
+ async def authenticate_handler(
583
+ method: str,
584
+ institution: str | None = None,
585
+ ) -> dict:
586
+ """Authenticate with institutional access."""
587
+ try:
588
+ from scitex.scholar import ScholarAuthManager
589
+
590
+ loop = asyncio.get_event_loop()
591
+
592
+ def do_auth():
593
+ auth_manager = ScholarAuthManager()
594
+
595
+ if method == "openathens":
596
+ success = auth_manager.authenticate_openathens(institution)
597
+ elif method == "shibboleth":
598
+ success = auth_manager.authenticate_shibboleth(institution)
599
+ else:
600
+ return {"error": f"Unknown auth method: {method}"}
601
+
602
+ return {"authenticated": success}
603
+
604
+ result = await loop.run_in_executor(None, do_auth)
605
+
606
+ return {
607
+ "success": True,
608
+ "method": method,
609
+ "institution": institution,
610
+ **result,
611
+ "timestamp": datetime.now().isoformat(),
612
+ }
613
+
614
+ except Exception as e:
615
+ return {"success": False, "error": str(e)}
616
+
617
+
618
+ async def export_papers_handler(
619
+ output_path: str,
620
+ project: str | None = None,
621
+ format: str = "bibtex",
622
+ filter_has_pdf: bool = False,
623
+ ) -> dict:
624
+ """Export papers to various formats."""
625
+ try:
626
+ from scitex.scholar import Scholar
627
+
628
+ loop = asyncio.get_event_loop()
629
+ scholar = Scholar(project=project) if project else Scholar()
630
+
631
+ def do_export():
632
+ # Get papers from library
633
+ papers = scholar.get_library_papers(project=project)
634
+
635
+ if filter_has_pdf:
636
+ papers = [p for p in papers if hasattr(p, "pdf_path") and p.pdf_path]
637
+
638
+ # Export based on format
639
+ out_path = Path(output_path)
640
+ out_path.parent.mkdir(parents=True, exist_ok=True)
641
+
642
+ if format == "bibtex":
643
+ papers.save(str(out_path))
644
+ elif format == "json":
645
+ with open(out_path, "w") as f:
646
+ json.dump([p.to_dict() for p in papers], f, indent=2)
647
+ elif format == "csv":
648
+ import csv
649
+
650
+ with open(out_path, "w", newline="") as f:
651
+ writer = csv.DictWriter(
652
+ f, fieldnames=["title", "authors", "year", "doi", "journal"]
653
+ )
654
+ writer.writeheader()
655
+ for p in papers:
656
+ writer.writerow(
657
+ {
658
+ "title": p.title,
659
+ "authors": (
660
+ "; ".join(p.authors[:3]) if p.authors else ""
661
+ ),
662
+ "year": p.year,
663
+ "doi": p.doi,
664
+ "journal": p.journal,
665
+ }
666
+ )
667
+ elif format == "ris":
668
+ papers.save(str(out_path), format="ris")
669
+
670
+ return {"count": len(papers), "path": str(out_path)}
671
+
672
+ result = await loop.run_in_executor(None, do_export)
673
+
674
+ return {
675
+ "success": True,
676
+ "format": format,
677
+ **result,
678
+ "timestamp": datetime.now().isoformat(),
679
+ }
680
+
681
+ except Exception as e:
682
+ return {"success": False, "error": str(e)}
683
+
684
+
685
+ # EOF