scitex 2.14.0__py3-none-any.whl → 2.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +27 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +17 -210
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +160 -41
  68. scitex/cli/capture.py +133 -20
  69. scitex/cli/introspect.py +488 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +414 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +154 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +355 -0
  79. scitex/cli/stats.py +136 -11
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +49 -299
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +48 -56
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/security/README.md +3 -3
  178. scitex/session/README.md +1 -1
  179. scitex/session/__init__.py +26 -7
  180. scitex/session/_decorator.py +1 -1
  181. scitex/sh/README.md +1 -1
  182. scitex/sh/__init__.py +7 -4
  183. scitex/social/__init__.py +155 -0
  184. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  185. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  186. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  187. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  188. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  189. scitex/stats/_mcp/_handlers/_format.py +94 -0
  190. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  191. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  192. scitex/stats/_mcp/_handlers/_power.py +247 -0
  193. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  194. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  195. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  196. scitex/stats/_mcp/handlers.py +19 -1171
  197. scitex/stats/auto/_stat_style.py +175 -0
  198. scitex/stats/auto/_style_definitions.py +411 -0
  199. scitex/stats/auto/_styles.py +22 -620
  200. scitex/stats/descriptive/__init__.py +11 -8
  201. scitex/stats/descriptive/_ci.py +39 -0
  202. scitex/stats/power/_power.py +15 -4
  203. scitex/str/__init__.py +2 -1
  204. scitex/str/_title_case.py +63 -0
  205. scitex/template/README.md +1 -1
  206. scitex/template/__init__.py +25 -10
  207. scitex/template/_code_templates.py +147 -0
  208. scitex/template/_mcp/handlers.py +81 -0
  209. scitex/template/_mcp/tool_schemas.py +55 -0
  210. scitex/template/_templates/__init__.py +51 -0
  211. scitex/template/_templates/audio.py +233 -0
  212. scitex/template/_templates/canvas.py +312 -0
  213. scitex/template/_templates/capture.py +268 -0
  214. scitex/template/_templates/config.py +43 -0
  215. scitex/template/_templates/diagram.py +294 -0
  216. scitex/template/_templates/io.py +107 -0
  217. scitex/template/_templates/module.py +53 -0
  218. scitex/template/_templates/plt.py +202 -0
  219. scitex/template/_templates/scholar.py +267 -0
  220. scitex/template/_templates/session.py +130 -0
  221. scitex/template/_templates/session_minimal.py +43 -0
  222. scitex/template/_templates/session_plot.py +67 -0
  223. scitex/template/_templates/session_stats.py +77 -0
  224. scitex/template/_templates/stats.py +323 -0
  225. scitex/template/_templates/writer.py +296 -0
  226. scitex/template/clone_writer_directory.py +5 -5
  227. scitex/ui/_backends/_email.py +10 -2
  228. scitex/ui/_backends/_webhook.py +5 -1
  229. scitex/web/_search_pubmed.py +10 -6
  230. scitex/writer/README.md +1 -1
  231. scitex/writer/__init__.py +43 -34
  232. scitex/writer/_mcp/handlers.py +11 -744
  233. scitex/writer/_mcp/tool_schemas.py +5 -335
  234. scitex-2.15.3.dist-info/METADATA +667 -0
  235. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/RECORD +241 -120
  236. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  237. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  238. scitex/diagram/_compile.py +0 -312
  239. scitex/diagram/_diagram.py +0 -355
  240. scitex/diagram/_mcp/__init__.py +0 -4
  241. scitex/diagram/_mcp/handlers.py +0 -400
  242. scitex/diagram/_mcp/tool_schemas.py +0 -157
  243. scitex/diagram/_presets.py +0 -173
  244. scitex/diagram/_schema.py +0 -182
  245. scitex/diagram/_split.py +0 -278
  246. scitex/gen/_ci.py +0 -12
  247. scitex/gen/_title_case.py +0 -89
  248. scitex/plt/_mcp/__init__.py +0 -4
  249. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  250. scitex/plt/_mcp/_handlers_figure.py +0 -195
  251. scitex/plt/_mcp/_handlers_plot.py +0 -252
  252. scitex/plt/_mcp/_handlers_style.py +0 -219
  253. scitex/plt/_mcp/handlers.py +0 -74
  254. scitex/plt/_mcp/tool_schemas.py +0 -497
  255. scitex/plt/mcp_server.py +0 -231
  256. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  257. scitex/scholar/examples/dev.py +0 -38
  258. scitex-2.14.0.dist-info/METADATA +0 -1238
  259. /scitex/{gen → context}/_detect_environment.py +0 -0
  260. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  261. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  262. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/WHEEL +0 -0
  263. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/entry_points.txt +0 -0
  264. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,20 +1,12 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # Timestamp: "2025-10-13 07:54:07 (ywatanabe)"
4
- # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/ScholarPDFDownloader.py
5
- # ----------------------------------------
6
- from __future__ import annotations
7
- import os
8
-
9
- __FILE__ = "./src/scitex/scholar/pdf_download/ScholarPDFDownloader.py"
10
- __DIR__ = os.path.dirname(__FILE__)
11
- # ----------------------------------------
2
+ # Timestamp: "2026-01-22 (ywatanabe)"
3
+ # File: src/scitex/scholar/pdf_download/ScholarPDFDownloader.py
4
+ """PDF downloader with multiple fallback strategies."""
12
5
 
13
- import argparse
6
+ from __future__ import annotations
14
7
 
15
- __FILE__ = __file__
16
8
  import asyncio
17
- import hashlib
9
+ import os
18
10
  import traceback
19
11
  from pathlib import Path
20
12
  from typing import List, Optional, Union
@@ -22,17 +14,14 @@ from typing import List, Optional, Union
22
14
  from playwright.async_api import BrowserContext
23
15
 
24
16
  from scitex import logging
25
- from scitex.browser.debugging import browser_logger
26
17
  from scitex.scholar import ScholarConfig
27
18
  from scitex.scholar.pdf_download.strategies import (
28
- DownloadMonitorAndSync,
29
19
  FlexibleFilenameGenerator,
30
- show_stop_automation_button_async,
20
+ handle_manual_download_on_page_async,
31
21
  try_download_chrome_pdf_viewer_async,
32
22
  try_download_direct_async,
33
- try_download_manual_async,
34
- try_download_response_body_async,
35
23
  try_download_open_access_async,
24
+ try_download_response_body_async,
36
25
  )
37
26
 
38
27
  logger = logging.getLogger(__name__)
@@ -41,31 +30,21 @@ logger = logging.getLogger(__name__)
41
30
  class ScholarPDFDownloader:
42
31
  """Download PDFs from URLs with multiple fallback strategies.
43
32
 
44
- This class focuses solely on downloading PDFs from URLs using various strategies:
33
+ Strategies tried in order:
45
34
  - Chrome PDF Viewer
46
35
  - Direct Download (ERR_ABORTED)
47
36
  - Response Body Extraction
48
37
  - Manual Download Fallback
49
38
 
50
- URL resolution (DOI URL) should be handled by the caller.
51
-
52
- Logging Strategy:
53
- - Uses `logger` for terminal-only logs (batch operations, coordination)
54
- - Uses `await browser_logger` for browser automation logs (visual popups)
55
- - All messages prefixed with self.name for traceability
39
+ URL resolution (DOI -> URL) should be handled by the caller.
56
40
  """
57
41
 
58
- def __init__(
59
- self,
60
- context: BrowserContext,
61
- config: ScholarConfig = None,
62
- ):
42
+ def __init__(self, context: BrowserContext, config: ScholarConfig = None):
63
43
  self.name = self.__class__.__name__
64
44
  self.config = config if config else ScholarConfig()
65
45
  self.context = context
66
46
  self.output_dir = self.config.get_library_downloads_dir()
67
47
 
68
- # Load access preferences from config
69
48
  self.prefer_open_access = self.config.resolve(
70
49
  "prefer_open_access", default=True, type=bool
71
50
  )
@@ -79,27 +58,14 @@ class ScholarPDFDownloader:
79
58
  async def __aexit__(self, exc_type, exc_val, exc_tb):
80
59
  pass
81
60
 
82
- # Main entry points
83
- # ----------------------------------------
84
-
85
61
  async def download_from_urls(
86
62
  self,
87
63
  pdf_urls: List[str],
88
64
  output_dir: Union[str, Path] = None,
89
65
  max_concurrent: int = 3,
90
66
  ) -> List[Path]:
91
- """Download multiple PDFs with parallel processing.
92
-
93
- Args:
94
- pdf_urls: List of PDF URLs to download
95
- output_dir: Output directory for downloaded PDFs
96
- max_concurrent: Maximum number of concurrent downloads (default: 3)
97
-
98
- Returns:
99
- List of paths to suffcessfully downloaded PDFs
100
- """
67
+ """Download multiple PDFs with parallel processing."""
101
68
  output_dir = output_dir or self.output_dir
102
-
103
69
  if not pdf_urls:
104
70
  return []
105
71
 
@@ -108,7 +74,6 @@ class ScholarPDFDownloader:
108
74
  for ii_pdf, pdf_url in enumerate(pdf_urls)
109
75
  ]
110
76
 
111
- # Use semaphore for controlled parallelization
112
77
  semaphore = asyncio.Semaphore(max_concurrent)
113
78
 
114
79
  async def download_with_semaphore(url: str, path: Path, index: int):
@@ -128,7 +93,6 @@ class ScholarPDFDownloader:
128
93
 
129
94
  results = await asyncio.gather(*tasks, return_exceptions=True)
130
95
 
131
- # Filter suffcessful downloads
132
96
  saved_paths = []
133
97
  for result in results:
134
98
  if isinstance(result, Exception):
@@ -136,9 +100,7 @@ class ScholarPDFDownloader:
136
100
  elif result:
137
101
  saved_paths.append(result)
138
102
 
139
- logger.info(
140
- f"{self.name}: Downloaded {len(saved_paths)}/{len(pdf_urls)} PDFs suffcessfully"
141
- )
103
+ logger.info(f"{self.name}: Downloaded {len(saved_paths)}/{len(pdf_urls)} PDFs")
142
104
  return saved_paths
143
105
 
144
106
  async def download_open_access(
@@ -147,20 +109,7 @@ class ScholarPDFDownloader:
147
109
  output_path: Union[str, Path],
148
110
  metadata: Optional[dict] = None,
149
111
  ) -> Optional[Path]:
150
- """Download PDF from an Open Access URL.
151
-
152
- This is a simpler path for known OA papers - no browser automation needed.
153
- Uses direct HTTP download with appropriate handling for different OA sources
154
- (arXiv, PMC, OpenAlex OA URLs, etc.).
155
-
156
- Args:
157
- oa_url: Open Access URL (from paper.metadata.access.oa_url)
158
- output_path: Path to save the downloaded PDF
159
- metadata: Optional paper metadata for logging
160
-
161
- Returns:
162
- Path to downloaded PDF if successful, None otherwise
163
- """
112
+ """Download PDF from an Open Access URL."""
164
113
  if not oa_url:
165
114
  logger.debug(f"{self.name}: No OA URL provided")
166
115
  return None
@@ -183,39 +132,19 @@ class ScholarPDFDownloader:
183
132
  if result:
184
133
  logger.info(f"{self.name}: Successfully downloaded OA PDF to {result}")
185
134
  else:
186
- logger.debug(
187
- f"{self.name}: OA download failed, may need browser-based download"
188
- )
135
+ logger.debug(f"{self.name}: OA download failed")
189
136
 
190
137
  return result
191
138
 
192
139
  async def download_smart(
193
- self,
194
- paper,
195
- output_path: Union[str, Path],
140
+ self, paper, output_path: Union[str, Path]
196
141
  ) -> Optional[Path]:
197
- """Smart download method that chooses the best strategy based on paper metadata.
198
-
199
- Priority order:
200
- 1. Try Open Access URL if available and prefer_open_access is True
201
- 2. Try regular PDF URLs if available
202
- 3. Try paywall access if enable_paywall_access is True and OA failed
203
-
204
- Args:
205
- paper: Paper object with metadata (from scitex.scholar.core.Paper)
206
- output_path: Path to save the downloaded PDF
207
-
208
- Returns:
209
- Path to downloaded PDF if successful, None otherwise
210
- """
211
- from scitex.scholar.core.Paper import Paper
212
-
142
+ """Smart download choosing best strategy based on paper metadata."""
213
143
  if isinstance(output_path, str):
214
144
  output_path = Path(output_path)
215
145
  if not str(output_path).endswith(".pdf"):
216
146
  output_path = Path(str(output_path) + ".pdf")
217
147
 
218
- # Extract metadata
219
148
  meta = paper.metadata if hasattr(paper, "metadata") else paper
220
149
  access = getattr(meta, "access", None)
221
150
  url_meta = getattr(meta, "url", None)
@@ -233,7 +162,6 @@ class ScholarPDFDownloader:
233
162
  logger.info(f"{self.name}: Trying Open Access URL first")
234
163
  result = await self.download_open_access(oa_url, output_path)
235
164
  if result:
236
- # Update access metadata to record successful OA download
237
165
  if access and self.track_paywall_attempts:
238
166
  access.paywall_bypass_attempted = False
239
167
  return result
@@ -253,7 +181,6 @@ class ScholarPDFDownloader:
253
181
  if access and self.track_paywall_attempts:
254
182
  access.paywall_bypass_attempted = True
255
183
 
256
- # Use DOI-based URL if available
257
184
  if doi:
258
185
  doi_url = f"https://doi.org/{doi}"
259
186
  result = await self.download_from_url(doi_url, output_path, doi=doi)
@@ -261,9 +188,8 @@ class ScholarPDFDownloader:
261
188
  if access and self.track_paywall_attempts:
262
189
  access.paywall_bypass_success = True
263
190
  return result
264
- else:
265
- if access and self.track_paywall_attempts:
266
- access.paywall_bypass_success = False
191
+ elif access and self.track_paywall_attempts:
192
+ access.paywall_bypass_success = False
267
193
 
268
194
  logger.warning(f"{self.name}: All download strategies exhausted for DOI={doi}")
269
195
  return None
@@ -274,12 +200,7 @@ class ScholarPDFDownloader:
274
200
  output_path: Union[str, Path],
275
201
  doi: Optional[str] = None,
276
202
  ) -> Optional[Path]:
277
- """Main download method with manual override support.
278
-
279
- Shows manual download button immediately - if clicked, switches to manual mode.
280
- Otherwise tries automated download strategies.
281
- """
282
-
203
+ """Main download method with manual override support."""
283
204
  if not pdf_url:
284
205
  logger.warning(f"{self.name}: PDF URL passed but not valid: {pdf_url}")
285
206
  return None
@@ -290,41 +211,26 @@ class ScholarPDFDownloader:
290
211
  output_path = Path(str(output_path) + ".pdf")
291
212
  output_path.parent.mkdir(parents=True, exist_ok=True)
292
213
 
293
- # Generate target filename for button display
294
214
  target_filename = FlexibleFilenameGenerator.generate_filename(
295
- doi=doi,
296
- url=pdf_url,
297
- content_type="main",
215
+ doi=doi, url=pdf_url, content_type="main"
298
216
  )
299
217
 
300
- # Create stop event for manual mode
301
218
  stop_event = asyncio.Event()
219
+ self.context._scitex_is_manual_mode = False
220
+ self.context._scitex_manual_mode_event = stop_event
302
221
 
303
- # Add manual mode flag to context (shared across all strategies)
304
- self.context._scitex_is_manual_mode = False # Flag strategies can check
305
- self.context._scitex_manual_mode_event = (
306
- stop_event # Event for internal monitoring
307
- )
308
-
309
- # Inject manual mode button script into ALL pages in this context
310
- # This ensures button appears on every page, even after redirects
311
222
  from scitex.scholar.pdf_download.strategies.manual_download_utils import (
312
223
  get_manual_button_init_script,
313
224
  )
314
225
 
315
226
  button_script = get_manual_button_init_script(target_filename)
316
227
  await self.context.add_init_script(button_script)
317
- logger.info(
318
- f"{self.name}: Manual mode button injected into browser context (appears on ALL pages)"
319
- )
228
+ logger.info(f"{self.name}: Manual mode button injected into browser context")
320
229
 
321
- # Create manual mode monitoring (will be used if user presses 'M')
322
230
  button_task = None
323
231
  pdf_page = None
324
232
 
325
- # Define download strategies with their names
326
233
  async def chrome_pdf_wrapper(url, path):
327
- # Chrome PDF strategy creates its own page
328
234
  return await try_download_chrome_pdf_viewer_async(
329
235
  self.context, url, path, self.name
330
236
  )
@@ -338,8 +244,6 @@ class ScholarPDFDownloader:
338
244
  )
339
245
 
340
246
  async def manual_fallback_wrapper(url, path):
341
- # Don't run manual download in the loop - it's handled separately after
342
- # if stop_event is set
343
247
  return None
344
248
 
345
249
  try_download_methods = [
@@ -350,81 +254,61 @@ class ScholarPDFDownloader:
350
254
  ]
351
255
 
352
256
  for method_name, method_func in try_download_methods:
353
- # Check if user activated manual mode - STOP ALL AUTOMATION IMMEDIATELY
354
257
  if stop_event.is_set():
355
- logger.info(
356
- f"{self.name}: User activated manual mode - stopping all automation"
357
- )
258
+ logger.info(f"{self.name}: Manual mode - stopping automation")
358
259
  break
359
260
 
360
261
  logger.info(f"{self.name}: Trying method: {method_name}")
361
262
 
362
- # Pass stop_event to strategies so they can check it periodically
363
263
  try:
364
- # Check before starting
365
264
  if stop_event.is_set():
366
- logger.info(
367
- f"{self.name}: Manual mode activated, skipping {method_name}"
368
- )
265
+ logger.info(f"{self.name}: Manual mode, skipping {method_name}")
369
266
  break
370
267
 
371
- # Run the method - it should check stop_event periodically
372
268
  is_downloaded = await method_func(pdf_url, output_path)
373
269
 
374
- # Check after completing
375
270
  if stop_event.is_set():
376
- logger.info(
377
- f"{self.name}: Manual mode activated during {method_name}"
378
- )
271
+ logger.info(f"{self.name}: Manual mode during {method_name}")
379
272
  break
380
273
 
381
274
  if is_downloaded:
382
- # Clean up
383
275
  if button_task:
384
276
  button_task.cancel()
385
277
  if pdf_page:
386
278
  await pdf_page.close()
387
- logger.info(
388
- f"{self.name}: Suffcessfully downloaded via {method_name}"
389
- )
390
- return is_downloaded # Return the actual path from the strategy
279
+ logger.info(f"{self.name}: Downloaded via {method_name}")
280
+ return is_downloaded
391
281
  else:
392
- logger.debug(
393
- f"{self.name}: {method_name} returned None (failed or not applicable)"
394
- )
282
+ logger.debug(f"{self.name}: {method_name} returned None")
395
283
  except Exception as e:
396
284
  logger.warning(f"{self.name}: {method_name} raised exception: {e}")
397
285
  logger.debug(f"{self.name}: Traceback: {traceback.format_exc()}")
398
286
 
399
- # If user chose manual download or all automation failed
287
+ # Handle manual download if user chose it
400
288
  if stop_event.is_set():
401
- # Set context flag so all strategies know we're in manual mode
402
289
  self.context._scitex_is_manual_mode = True
403
-
404
- logger.info(
405
- f"{self.name}: User chose manual download - starting monitoring"
406
- )
407
- # Cancel button task
290
+ logger.info(f"{self.name}: User chose manual download - starting")
408
291
  if button_task:
409
292
  button_task.cancel()
410
293
 
411
- # Open page for manual download if not already open
412
294
  if not pdf_page:
413
295
  pdf_page = await self.context.new_page()
414
296
  await pdf_page.goto(
415
297
  pdf_url, timeout=30000, wait_until="domcontentloaded"
416
298
  )
417
299
 
418
- result = await self._handle_manual_download_async(
300
+ result = await handle_manual_download_on_page_async(
419
301
  pdf_page,
420
302
  pdf_url,
421
303
  output_path,
304
+ func_name=self.name,
305
+ config=self.config,
422
306
  doi=doi,
423
307
  )
424
308
  await pdf_page.close()
425
309
  return result
426
310
 
427
- # All methods failed - clean up
311
+ # All methods failed
428
312
  if button_task:
429
313
  button_task.cancel()
430
314
  if pdf_page:
@@ -432,273 +316,11 @@ class ScholarPDFDownloader:
432
316
  logger.fail(f"{self.name}: All download methods failed for {pdf_url}")
433
317
  return None
434
318
 
435
- # Helper functions
436
- # ----------------------------------------
437
-
438
- async def _handle_manual_download_async(
439
- self, page, pdf_url: str, output_path: Path, doi: Optional[str] = None
440
- ) -> Optional[Path]:
441
- """
442
- Handle manual download workflow when automation is stopped by user.
443
-
444
- Args:
445
- page: Playwright page where stop button was clicked
446
- pdf_url: URL of the PDF
447
- output_path: Target output path
448
- doi: Optional DOI for filename generation
449
-
450
- Returns:
451
- Path to downloaded file, or None if failed
452
- """
453
-
454
- # Get directories from config
455
- # IMPORTANT: Manual download should ONLY save to downloads dir
456
- # MASTER organization (8-digit IDs) is handled by storage module
457
- temp_downloads_dir = self.config.get_library_downloads_dir()
458
- final_pdfs_dir = self.config.get_library_downloads_dir() # NOT MASTER!
459
-
460
- # Extract DOI from URL if not provided
461
- if not doi and "doi.org/" in pdf_url:
462
- doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
463
-
464
- await browser_logger.info(
465
- page,
466
- f"{self.name}: Manual download mode activated",
467
- )
468
-
469
- # Page is already navigated to PDF URL (done in download_from_url)
470
- # Just show instructions
471
- await browser_logger.info(
472
- page,
473
- f"{self.name}: Please download the PDF manually from this page",
474
- )
475
-
476
- # Run complete manual download workflow (without showing button again)
477
- # The button was already shown and clicked to trigger this
478
- monitor = DownloadMonitorAndSync(temp_downloads_dir, final_pdfs_dir)
479
-
480
- # Create logger function for progress reporting (must be sync, not async)
481
- def log_progress(msg: str):
482
- logger.info(f"{self.name}: {msg}")
483
-
484
- # Monitor for new download with progress reporting (2 minutes)
485
- # Long timeouts cause process accumulation - keep it short
486
- temp_file = await monitor.monitor_for_new_download_async(
487
- timeout_sec=120, # 2 minutes to download
488
- logger_func=log_progress,
489
- )
490
-
491
- if not temp_file:
492
- await browser_logger.error(
493
- page,
494
- f"{self.name}: No new PDF detected in downloads directory",
495
- )
496
- return None
497
-
498
- await browser_logger.info(
499
- page,
500
- f"{self.name}: Detected PDF: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
501
- )
502
-
503
- # Keep UUID filename as-is in downloads directory
504
- # Orchestration layer will handle metadata extraction and MASTER organization
505
-
506
- # Save minimal metadata header (DOI only - no PDF parsing)
507
- if doi:
508
- import json
509
-
510
- metadata_file = temp_file.parent / f"{temp_file.name}.meta.json"
511
- metadata = {
512
- "doi": doi,
513
- "pdf_url": pdf_url,
514
- "pdf_file": temp_file.name,
515
- }
516
- with open(metadata_file, "w") as f:
517
- json.dump(metadata, f, indent=2)
518
-
519
- await browser_logger.info(
520
- page,
521
- f"{self.name}: Manual download complete - saved in downloads/",
522
- )
523
-
524
- logger.info(f"{self.name}: PDF: {temp_file}")
525
- if doi:
526
- logger.info(
527
- f"{self.name}: DOI: {doi} (saved in {temp_file.name}.meta.json)"
528
- )
529
-
530
- # Return the UUID file path (in downloads directory)
531
- return temp_file
532
-
533
-
534
- async def main_async(args):
535
- """Example usage showing decoupled URL resolution and downloading."""
536
- from scitex.scholar import (
537
- ScholarAuthManager,
538
- ScholarBrowserManager,
539
- ScholarURLFinder,
540
- )
541
- from scitex.scholar.auth import AuthenticationGateway
542
-
543
- # ---------------------------------------
544
- # Context Preparation
545
- # ---------------------------------------
546
- # Authenticated Browser and Context
547
- auth_manager = ScholarAuthManager()
548
- browser_manager = ScholarBrowserManager(
549
- chrome_profile_name="system",
550
- browser_mode=args.browser_mode,
551
- auth_manager=auth_manager,
552
- use_zenrows_proxy=False,
553
- )
554
- (
555
- browser,
556
- context,
557
- ) = await browser_manager.get_authenticated_browser_and_context_async()
558
-
559
- # Authentication Gateway
560
- auth_gateway = AuthenticationGateway(
561
- auth_manager=auth_manager,
562
- browser_manager=browser_manager,
563
- )
564
- url_context = await auth_gateway.prepare_context_async(
565
- doi=args.doi, context=context
566
- )
567
-
568
- # ---------------------------------------
569
- # Step 1: URL Resolution (separate from downloading)
570
- # ---------------------------------------
571
- url_finder = ScholarURLFinder(context)
572
-
573
- # Use the resolved URL from auth_gateway to avoid duplicate OpenURL resolution
574
- resolved_url = url_context.url if url_context else None
575
- if resolved_url:
576
- logger.info(f"{__name__}: Using resolved URL from auth_gateway: {resolved_url}")
577
- urls = await url_finder.find_pdf_urls(resolved_url)
578
- else:
579
- logger.info(f"{__name__}: No resolved URL, using DOI: {args.doi}")
580
- urls = await url_finder.find_pdf_urls(args.doi) # Will resolve DOI internally
581
-
582
- # Extract URL strings from list of dicts
583
- pdf_urls = []
584
- for entry in urls:
585
- if isinstance(entry, dict):
586
- pdf_urls.append(entry.get("url"))
587
- elif isinstance(entry, str):
588
- pdf_urls.append(entry)
589
-
590
- if not pdf_urls:
591
- logger.error(f"No PDF URLs found for DOI: {args.doi}")
592
- return
593
-
594
- logger.info(f"Found {len(pdf_urls)} PDF URL(s) for DOI: {args.doi}")
595
-
596
- # ---------------------------------------
597
- # Step 2: PDF Download (URL-only, decoupled from DOI resolution)
598
- # ---------------------------------------
599
- pdf_downloader = ScholarPDFDownloader(context)
600
-
601
- if len(pdf_urls) == 1:
602
- # Single URL - direct download
603
- await pdf_downloader.download_from_url(pdf_urls[0], args.output)
604
- else:
605
- # Multiple URLs - batch download with parallelization
606
- output_dir = Path(args.output).parent
607
- await pdf_downloader.download_from_urls(
608
- pdf_urls,
609
- output_dir=output_dir,
610
- max_concurrent=3,
611
- )
612
-
613
-
614
- def main(args):
615
- import asyncio
616
-
617
- asyncio.run(main_async(args))
618
-
619
- return 0
620
-
621
-
622
- def parse_args() -> argparse.Namespace:
623
- """Parse command line arguments."""
624
- parser = argparse.ArgumentParser(
625
- description="Download a PDF using DOI with authentication support"
626
- )
627
- parser.add_argument(
628
- "--doi",
629
- type=str,
630
- required=True,
631
- help="DOI of the paper (e.g., 10.1088/1741-2552/aaf92e)",
632
- )
633
- parser.add_argument(
634
- "--output",
635
- type=str,
636
- default="~/.scitex/scholar/library/downloads/downloaded_paper.pdf",
637
- help="Output path for the PDF (default: ~/.scitex/scholar/library/downloads/downloaded_paper.pdf)",
638
- )
639
- parser.add_argument(
640
- "--browser-mode",
641
- type=str,
642
- choices=["stealth", "interactive"],
643
- default="stealth",
644
- help="Browser mode (default: stealth)",
645
- )
646
-
647
- args = parser.parse_args()
648
- return args
649
-
650
-
651
- def run_main() -> None:
652
- """Initialize scitex framework, run main function, and cleanup."""
653
- global CONFIG, CC, sys, plt, rng
654
-
655
- import sys
656
-
657
- import matplotlib.pyplot as plt
658
-
659
- import scitex as stx
660
-
661
- args = parse_args()
662
-
663
- CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
664
- sys,
665
- plt,
666
- args=args,
667
- file=__FILE__,
668
- sdir_suffix=None,
669
- verbose=False,
670
- agg=True,
671
- )
672
-
673
- exit_status = main(args)
674
-
675
- stx.session.close(
676
- CONFIG,
677
- verbose=False,
678
- notify=False,
679
- message="",
680
- exit_status=exit_status,
681
- )
682
-
683
319
 
320
+ # CLI entry point moved to _cli.py
684
321
  if __name__ == "__main__":
685
- run_main()
686
-
687
- """
688
- python -m scitex.scholar.download.ScholarPDFDownloader \
689
- --browser-mode interactive \
690
- --doi "10.1016/j.clinph.2024.09.017"
691
-
692
- python -m scitex.scholar.download.ScholarPDFDownloader \
693
- --browser-mode interactive \
694
- --doi "10.1212/wnl.0000000000200348"
322
+ from scitex.scholar.pdf_download._cli import run_main
695
323
 
696
-
697
- # This seems calling URL Resolution on OpenURL twice
698
-
699
- --doi "10.3389/fnins.2024.1417748"
700
- --doi "10.1016/j.clinph.2024.09.017"
701
-
702
- """
324
+ run_main()
703
325
 
704
326
  # EOF