scitex 2.14.0__py3-none-any.whl → 2.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +27 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +17 -210
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +160 -41
  68. scitex/cli/capture.py +133 -20
  69. scitex/cli/introspect.py +488 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +414 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +154 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +355 -0
  79. scitex/cli/stats.py +136 -11
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +49 -299
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +48 -56
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/security/README.md +3 -3
  178. scitex/session/README.md +1 -1
  179. scitex/session/__init__.py +26 -7
  180. scitex/session/_decorator.py +1 -1
  181. scitex/sh/README.md +1 -1
  182. scitex/sh/__init__.py +7 -4
  183. scitex/social/__init__.py +155 -0
  184. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  185. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  186. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  187. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  188. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  189. scitex/stats/_mcp/_handlers/_format.py +94 -0
  190. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  191. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  192. scitex/stats/_mcp/_handlers/_power.py +247 -0
  193. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  194. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  195. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  196. scitex/stats/_mcp/handlers.py +19 -1171
  197. scitex/stats/auto/_stat_style.py +175 -0
  198. scitex/stats/auto/_style_definitions.py +411 -0
  199. scitex/stats/auto/_styles.py +22 -620
  200. scitex/stats/descriptive/__init__.py +11 -8
  201. scitex/stats/descriptive/_ci.py +39 -0
  202. scitex/stats/power/_power.py +15 -4
  203. scitex/str/__init__.py +2 -1
  204. scitex/str/_title_case.py +63 -0
  205. scitex/template/README.md +1 -1
  206. scitex/template/__init__.py +25 -10
  207. scitex/template/_code_templates.py +147 -0
  208. scitex/template/_mcp/handlers.py +81 -0
  209. scitex/template/_mcp/tool_schemas.py +55 -0
  210. scitex/template/_templates/__init__.py +51 -0
  211. scitex/template/_templates/audio.py +233 -0
  212. scitex/template/_templates/canvas.py +312 -0
  213. scitex/template/_templates/capture.py +268 -0
  214. scitex/template/_templates/config.py +43 -0
  215. scitex/template/_templates/diagram.py +294 -0
  216. scitex/template/_templates/io.py +107 -0
  217. scitex/template/_templates/module.py +53 -0
  218. scitex/template/_templates/plt.py +202 -0
  219. scitex/template/_templates/scholar.py +267 -0
  220. scitex/template/_templates/session.py +130 -0
  221. scitex/template/_templates/session_minimal.py +43 -0
  222. scitex/template/_templates/session_plot.py +67 -0
  223. scitex/template/_templates/session_stats.py +77 -0
  224. scitex/template/_templates/stats.py +323 -0
  225. scitex/template/_templates/writer.py +296 -0
  226. scitex/template/clone_writer_directory.py +5 -5
  227. scitex/ui/_backends/_email.py +10 -2
  228. scitex/ui/_backends/_webhook.py +5 -1
  229. scitex/web/_search_pubmed.py +10 -6
  230. scitex/writer/README.md +1 -1
  231. scitex/writer/__init__.py +43 -34
  232. scitex/writer/_mcp/handlers.py +11 -744
  233. scitex/writer/_mcp/tool_schemas.py +5 -335
  234. scitex-2.15.3.dist-info/METADATA +667 -0
  235. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/RECORD +241 -120
  236. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  237. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  238. scitex/diagram/_compile.py +0 -312
  239. scitex/diagram/_diagram.py +0 -355
  240. scitex/diagram/_mcp/__init__.py +0 -4
  241. scitex/diagram/_mcp/handlers.py +0 -400
  242. scitex/diagram/_mcp/tool_schemas.py +0 -157
  243. scitex/diagram/_presets.py +0 -173
  244. scitex/diagram/_schema.py +0 -182
  245. scitex/diagram/_split.py +0 -278
  246. scitex/gen/_ci.py +0 -12
  247. scitex/gen/_title_case.py +0 -89
  248. scitex/plt/_mcp/__init__.py +0 -4
  249. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  250. scitex/plt/_mcp/_handlers_figure.py +0 -195
  251. scitex/plt/_mcp/_handlers_plot.py +0 -252
  252. scitex/plt/_mcp/_handlers_style.py +0 -219
  253. scitex/plt/_mcp/handlers.py +0 -74
  254. scitex/plt/_mcp/tool_schemas.py +0 -497
  255. scitex/plt/mcp_server.py +0 -231
  256. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  257. scitex/scholar/examples/dev.py +0 -38
  258. scitex-2.14.0.dist-info/METADATA +0 -1238
  259. /scitex/{gen → context}/_detect_environment.py +0 -0
  260. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  261. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  262. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/WHEEL +0 -0
  263. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/entry_points.txt +0 -0
  264. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-22 (ywatanabe)"
3
+ # File: src/scitex/scholar/pdf_download/_cli.py
4
+ """CLI entry point for ScholarPDFDownloader."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import asyncio
10
+ from pathlib import Path
11
+
12
+ from scitex import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ __FILE__ = __file__
17
+
18
+
19
+ async def main_async(args):
20
+ """Example usage showing decoupled URL resolution and downloading."""
21
+ from scitex.scholar import (
22
+ ScholarAuthManager,
23
+ ScholarBrowserManager,
24
+ ScholarURLFinder,
25
+ )
26
+ from scitex.scholar.auth import AuthenticationGateway
27
+ from scitex.scholar.pdf_download import ScholarPDFDownloader
28
+
29
+ # Authenticated Browser and Context
30
+ auth_manager = ScholarAuthManager()
31
+ browser_manager = ScholarBrowserManager(
32
+ chrome_profile_name="system",
33
+ browser_mode=args.browser_mode,
34
+ auth_manager=auth_manager,
35
+ use_zenrows_proxy=False,
36
+ )
37
+ (
38
+ browser,
39
+ context,
40
+ ) = await browser_manager.get_authenticated_browser_and_context_async()
41
+
42
+ # Authentication Gateway
43
+ auth_gateway = AuthenticationGateway(
44
+ auth_manager=auth_manager,
45
+ browser_manager=browser_manager,
46
+ )
47
+ url_context = await auth_gateway.prepare_context_async(
48
+ doi=args.doi, context=context
49
+ )
50
+
51
+ # URL Resolution
52
+ url_finder = ScholarURLFinder(context)
53
+ resolved_url = url_context.url if url_context else None
54
+ if resolved_url:
55
+ logger.info(f"Using resolved URL from auth_gateway: {resolved_url}")
56
+ urls = await url_finder.find_pdf_urls(resolved_url)
57
+ else:
58
+ logger.info(f"No resolved URL, using DOI: {args.doi}")
59
+ urls = await url_finder.find_pdf_urls(args.doi)
60
+
61
+ # Extract URL strings from list of dicts
62
+ pdf_urls = []
63
+ for entry in urls:
64
+ if isinstance(entry, dict):
65
+ pdf_urls.append(entry.get("url"))
66
+ elif isinstance(entry, str):
67
+ pdf_urls.append(entry)
68
+
69
+ if not pdf_urls:
70
+ logger.error(f"No PDF URLs found for DOI: {args.doi}")
71
+ return
72
+
73
+ logger.info(f"Found {len(pdf_urls)} PDF URL(s) for DOI: {args.doi}")
74
+
75
+ # PDF Download
76
+ pdf_downloader = ScholarPDFDownloader(context)
77
+ if len(pdf_urls) == 1:
78
+ await pdf_downloader.download_from_url(pdf_urls[0], args.output)
79
+ else:
80
+ output_dir = Path(args.output).parent
81
+ await pdf_downloader.download_from_urls(
82
+ pdf_urls, output_dir=output_dir, max_concurrent=3
83
+ )
84
+
85
+
86
+ def main(args):
87
+ asyncio.run(main_async(args))
88
+ return 0
89
+
90
+
91
+ def parse_args() -> argparse.Namespace:
92
+ """Parse command line arguments."""
93
+ parser = argparse.ArgumentParser(
94
+ description="Download a PDF using DOI with authentication support"
95
+ )
96
+ parser.add_argument(
97
+ "--doi",
98
+ type=str,
99
+ required=True,
100
+ help="DOI of the paper (e.g., 10.1088/1741-2552/aaf92e)",
101
+ )
102
+ parser.add_argument(
103
+ "--output",
104
+ type=str,
105
+ default="~/.scitex/scholar/library/downloads/downloaded_paper.pdf",
106
+ help="Output path for the PDF",
107
+ )
108
+ parser.add_argument(
109
+ "--browser-mode",
110
+ type=str,
111
+ choices=["stealth", "interactive"],
112
+ default="stealth",
113
+ help="Browser mode (default: stealth)",
114
+ )
115
+ return parser.parse_args()
116
+
117
+
118
+ def run_main() -> None:
119
+ """Initialize scitex framework, run main function, and cleanup."""
120
+ global CONFIG, CC, sys, plt, rng
121
+
122
+ import sys
123
+
124
+ import matplotlib.pyplot as plt
125
+
126
+ import scitex as stx
127
+
128
+ args = parse_args()
129
+
130
+ CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
131
+ sys,
132
+ plt,
133
+ args=args,
134
+ file=__FILE__,
135
+ sdir_suffix=None,
136
+ verbose=False,
137
+ agg=True,
138
+ )
139
+
140
+ exit_status = main(args)
141
+
142
+ stx.session.close(
143
+ CONFIG,
144
+ verbose=False,
145
+ notify=False,
146
+ message="",
147
+ exit_status=exit_status,
148
+ )
149
+
150
+
151
+ if __name__ == "__main__":
152
+ run_main()
153
+
154
+ # EOF
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """PDF Download Strategies
4
3
 
5
4
  This module contains different strategies for downloading PDFs from academic publishers.
@@ -9,21 +8,24 @@ Each strategy is tried in sequence until one succeeds.
9
8
  # Download strategies
10
9
  from .chrome_pdf_viewer import try_download_chrome_pdf_viewer_async
11
10
  from .direct_download import try_download_direct_async
12
- from .response_body import try_download_response_body_async
13
- from .manual_download_fallback import try_download_manual_async
14
- from .open_access_download import (
15
- try_download_open_access_async,
16
- try_download_open_access_sync,
11
+ from .manual_download_fallback import (
12
+ handle_manual_download_on_page_async,
13
+ try_download_manual_async,
17
14
  )
18
15
 
19
16
  # Manual download utilities
20
17
  from .manual_download_utils import (
21
18
  DownloadMonitorAndSync,
22
19
  FlexibleFilenameGenerator,
23
- show_stop_automation_button_async,
24
- show_manual_download_button_async,
25
20
  complete_manual_download_workflow_async,
21
+ show_manual_download_button_async,
22
+ show_stop_automation_button_async,
23
+ )
24
+ from .open_access_download import (
25
+ try_download_open_access_async,
26
+ try_download_open_access_sync,
26
27
  )
28
+ from .response_body import try_download_response_body_async
27
29
 
28
30
  __all__ = [
29
31
  # Download strategies
@@ -33,6 +35,7 @@ __all__ = [
33
35
  "try_download_manual_async",
34
36
  "try_download_open_access_async",
35
37
  "try_download_open_access_sync",
38
+ "handle_manual_download_on_page_async",
36
39
  # Manual download utilities
37
40
  "DownloadMonitorAndSync",
38
41
  "FlexibleFilenameGenerator",
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  # Timestamp: "2025-10-13 08:00:08 (ywatanabe)"
4
3
  # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py
5
4
  # ----------------------------------------
6
5
  from __future__ import annotations
6
+
7
7
  import os
8
8
 
9
9
  __FILE__ = "./src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py"
@@ -21,7 +21,6 @@ from scitex.scholar import ScholarConfig
21
21
  from scitex.scholar.browser import browser_logger
22
22
  from scitex.scholar.pdf_download.strategies.manual_download_utils import (
23
23
  DownloadMonitorAndSync,
24
- complete_manual_download_workflow_async,
25
24
  )
26
25
 
27
26
  logger = logging.getLogger(__name__)
@@ -51,7 +50,8 @@ async def try_download_manual_async(
51
50
  config: Scholar configuration
52
51
  doi: Optional DOI for filename generation
53
52
 
54
- Returns:
53
+ Returns
54
+ -------
55
55
  Path to downloaded file, or None if failed
56
56
  """
57
57
  config = config or ScholarConfig()
@@ -164,4 +164,81 @@ async def try_download_manual_async(
164
164
  return None
165
165
 
166
166
 
167
+ async def handle_manual_download_on_page_async(
168
+ page,
169
+ pdf_url: str,
170
+ output_path: Path,
171
+ func_name: str = "handle_manual_download",
172
+ config: ScholarConfig = None,
173
+ doi: Optional[str] = None,
174
+ ) -> Optional[Path]:
175
+ """Handle manual download on an already-open page.
176
+
177
+ Unlike try_download_manual_async, this uses an existing page
178
+ (e.g., from the stop automation button workflow).
179
+
180
+ Args:
181
+ page: Already-open Playwright page
182
+ pdf_url: URL of the PDF
183
+ output_path: Target output path
184
+ config: Scholar configuration
185
+ doi: Optional DOI for metadata
186
+
187
+ Returns
188
+ -------
189
+ Path to downloaded file, or None if failed
190
+ """
191
+ config = config or ScholarConfig()
192
+ downloads_dir = config.get_library_downloads_dir()
193
+
194
+ # Extract DOI from URL if not provided
195
+ if not doi and "doi.org/" in pdf_url:
196
+ doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
197
+
198
+ await browser_logger.info(page, f"{func_name}: Manual download mode activated")
199
+ await browser_logger.info(
200
+ page, f"{func_name}: Please download the PDF manually from this page"
201
+ )
202
+
203
+ # Monitor for download
204
+ monitor = DownloadMonitorAndSync(downloads_dir, downloads_dir)
205
+
206
+ def log_progress(msg: str):
207
+ logger.info(f"{func_name}: {msg}")
208
+
209
+ temp_file = await monitor.monitor_for_new_download_async(
210
+ timeout_sec=120, logger_func=log_progress
211
+ )
212
+
213
+ if not temp_file:
214
+ await browser_logger.error(
215
+ page, f"{func_name}: No new PDF detected in downloads directory"
216
+ )
217
+ return None
218
+
219
+ await browser_logger.info(
220
+ page,
221
+ f"{func_name}: Detected PDF: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
222
+ )
223
+
224
+ # Save minimal metadata
225
+ if doi:
226
+ import json
227
+
228
+ metadata_file = temp_file.parent / f"{temp_file.name}.meta.json"
229
+ metadata = {"doi": doi, "pdf_url": pdf_url, "pdf_file": temp_file.name}
230
+ with open(metadata_file, "w") as f:
231
+ json.dump(metadata, f, indent=2)
232
+
233
+ await browser_logger.info(
234
+ page, f"{func_name}: Manual download complete - saved in downloads/"
235
+ )
236
+
237
+ logger.info(f"{func_name}: PDF: {temp_file}")
238
+ if doi:
239
+ logger.info(f"{func_name}: DOI: {doi} (saved in {temp_file.name}.meta.json)")
240
+
241
+ return temp_file
242
+
243
+
167
244
  # EOF
@@ -1,14 +1,8 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
- # Timestamp: "2025-10-16 01:47:39 (ywatanabe)"
4
- # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pipelines/ScholarPipelineBibTeX.py
5
- # ----------------------------------------
6
- from __future__ import annotations
7
- import os
3
+ # Timestamp: "2026-01-22 16:32:41 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex-code/src/scitex/scholar/pipelines/ScholarPipelineBibTeX.py
8
5
 
9
- __FILE__ = "./src/scitex/scholar/pipelines/ScholarPipelineBibTeX.py"
10
- __DIR__ = os.path.dirname(__FILE__)
11
- # ----------------------------------------
12
6
 
13
7
  """
14
8
  Functionalities:
@@ -33,12 +27,12 @@ IO:
33
27
  - {input_bibtex}_processed.bib (enriched BibTeX with download status)
34
28
  """
35
29
 
36
- """Imports"""
37
- import argparse
38
30
  import asyncio
39
31
  from pathlib import Path
40
- from typing import Optional, Union
32
+ from typing import Optional
33
+ from typing import Union
41
34
 
35
+ import scitex as stx
42
36
  from scitex import logging
43
37
  from scitex.scholar.core import Papers
44
38
  from scitex.scholar.pipelines.ScholarPipelineParallel import (
@@ -49,8 +43,6 @@ from scitex.scholar.storage import BibTeXHandler
49
43
  logger = logging.getLogger(__name__)
50
44
 
51
45
  """Functions & Classes"""
52
-
53
-
54
46
  class ScholarPipelineBibTeX:
55
47
  """Processes BibTeX files through parallel paper acquisition pipeline"""
56
48
 
@@ -96,7 +88,8 @@ class ScholarPipelineBibTeX:
96
88
  project: Project name for symlinking (optional)
97
89
  output_bibtex_path: Path to save enriched BibTeX (optional, defaults to {input}_processed.bib)
98
90
 
99
- Returns:
91
+ Returns
92
+ -------
100
93
  Papers collection with processed papers
101
94
  """
102
95
  bibtex_path = Path(bibtex_path)
@@ -145,7 +138,9 @@ class ScholarPipelineBibTeX:
145
138
  logger.success(
146
139
  f"{self.name}: Processed {len(processed_papers)}/{len(papers)} papers"
147
140
  )
148
- logger.success(f"{self.name}: Saved enriched BibTeX: {output_bibtex_path}")
141
+ logger.success(
142
+ f"{self.name}: Saved enriched BibTeX: {output_bibtex_path}"
143
+ )
149
144
 
150
145
  # Update project bibliography if project specified
151
146
  if project:
@@ -161,7 +156,9 @@ class ScholarPipelineBibTeX:
161
156
  bibtex_files=[bibtex_path, output_bibtex_path],
162
157
  )
163
158
 
164
- logger.success(f"{self.name}: Updated project bibliography: {project}")
159
+ logger.success(
160
+ f"{self.name}: Updated project bibliography: {project}"
161
+ )
165
162
  except Exception as e:
166
163
  logger.warning(f"Failed to update bibliography: {e}")
167
164
 
@@ -180,7 +177,8 @@ class ScholarPipelineBibTeX:
180
177
  project: Project name for symlinking (optional)
181
178
  output_bibtex_path: Path to save enriched BibTeX (optional)
182
179
 
183
- Returns:
180
+ Returns
181
+ -------
184
182
  Papers collection with processed papers
185
183
  """
186
184
  logger.info(f"{self.name}: Processing BibTeX text content")
@@ -193,7 +191,9 @@ class ScholarPipelineBibTeX:
193
191
  logger.warning(f"{self.name}: No papers found in BibTeX text")
194
192
  return Papers([], project=project)
195
193
 
196
- logger.info(f"{self.name}: Loaded {len(papers)} papers from BibTeX text")
194
+ logger.info(
195
+ f"{self.name}: Loaded {len(papers)} papers from BibTeX text"
196
+ )
197
197
 
198
198
  # Step 2: Process papers in parallel
199
199
  papers_collection = Papers(papers, project=project)
@@ -213,7 +213,9 @@ class ScholarPipelineBibTeX:
213
213
  processed_collection,
214
214
  output_path=output_bibtex_path,
215
215
  )
216
- logger.success(f"{self.name}: Saved enriched BibTeX: {output_bibtex_path}")
216
+ logger.success(
217
+ f"{self.name}: Saved enriched BibTeX: {output_bibtex_path}"
218
+ )
217
219
 
218
220
  logger.success(
219
221
  f"{self.name}: Processed {len(processed_papers)}/{len(papers)} papers"
@@ -222,134 +224,84 @@ class ScholarPipelineBibTeX:
222
224
  return processed_collection
223
225
 
224
226
 
225
- def main(args):
226
- """Run BibTeX pipeline"""
227
-
228
- if not args.bibtex:
227
+ @stx.session
228
+ def main(
229
+ bibtex: str = None,
230
+ project: str = None,
231
+ output: str = None,
232
+ num_workers: int = 4,
233
+ browser_mode: str = "stealth",
234
+ chrome_profile: str = "system",
235
+ CONFIG=stx.INJECTED,
236
+ logger=stx.INJECTED,
237
+ ) -> int:
238
+ """Process BibTeX files through parallel paper acquisition pipeline.
239
+
240
+ Parameters
241
+ ----------
242
+ bibtex : str
243
+ Path to BibTeX file (required)
244
+ project : str
245
+ Project name for symlinking (optional)
246
+ output : str
247
+ Output BibTeX path (default: {input}_processed.bib)
248
+ num_workers : int
249
+ Number of parallel workers (default: 4)
250
+ browser_mode : str
251
+ Browser mode: 'stealth' or 'interactive' (default: stealth)
252
+ chrome_profile : str
253
+ Base Chrome profile name to sync from (default: system)
254
+
255
+ Returns
256
+ -------
257
+ int
258
+ Exit status code (0 for success)
259
+ """
260
+ if not bibtex:
229
261
  logger.error("No BibTeX file provided. Use --bibtex")
230
262
  return 1
231
263
 
232
- bibtex_path = Path(args.bibtex)
264
+ bibtex_path = Path(bibtex)
233
265
  if not bibtex_path.exists():
234
266
  logger.error(f"BibTeX file not found: {bibtex_path}")
235
267
  return 1
236
268
 
237
269
  logger.info(f"Processing BibTeX file: {bibtex_path}")
238
- logger.info(f"Workers: {args.num_workers}")
239
- logger.info(f"Project: {args.project or 'None'}")
270
+ logger.info(f"Workers: {num_workers}")
271
+ logger.info(f"Project: {project or 'None'}")
240
272
 
241
273
  # Create BibTeX pipeline
242
274
  bibtex_pipeline = ScholarPipelineBibTeX(
243
- num_workers=args.num_workers,
244
- browser_mode=args.browser_mode,
245
- base_chrome_profile=args.chrome_profile,
275
+ num_workers=num_workers,
276
+ browser_mode=browser_mode,
277
+ base_chrome_profile=chrome_profile,
246
278
  )
247
279
 
248
280
  # Run pipeline
249
281
  papers = asyncio.run(
250
282
  bibtex_pipeline.process_bibtex_file_async(
251
283
  bibtex_path=bibtex_path,
252
- project=args.project,
253
- output_bibtex_path=args.output,
284
+ project=project,
285
+ output_bibtex_path=output,
254
286
  )
255
287
  )
256
288
 
257
- logger.success(f"BibTeX processing complete: {len(papers)} papers processed")
258
- return 0
259
-
260
-
261
- def parse_args() -> argparse.Namespace:
262
- """Parse command line arguments."""
263
- parser = argparse.ArgumentParser(
264
- description="Process BibTeX files through parallel paper acquisition pipeline"
265
- )
266
- parser.add_argument(
267
- "--bibtex",
268
- type=str,
269
- required=True,
270
- help="Path to BibTeX file",
271
- )
272
- parser.add_argument(
273
- "--project",
274
- type=str,
275
- default=None,
276
- help="Project name for symlinking (optional)",
277
- )
278
- parser.add_argument(
279
- "--output",
280
- type=str,
281
- default=None,
282
- help="Output BibTeX path (default: {input}_processed.bib)",
283
- )
284
- parser.add_argument(
285
- "--num-workers",
286
- type=int,
287
- default=4,
288
- help="Number of parallel workers (default: 4)",
289
- )
290
- parser.add_argument(
291
- "--browser-mode",
292
- type=str,
293
- choices=["stealth", "interactive"],
294
- default="stealth",
295
- help="Browser mode (default: stealth)",
296
- )
297
- parser.add_argument(
298
- "--chrome-profile",
299
- type=str,
300
- default="system",
301
- help="Base Chrome profile name to sync from (default: system)",
302
- )
303
- args = parser.parse_args()
304
- return args
305
-
306
-
307
- def run_main() -> None:
308
- """Initialize scitex framework, run main function, and cleanup."""
309
- global CONFIG, CC, sys, plt, rng
310
-
311
- import sys
312
-
313
- import matplotlib.pyplot as plt
314
-
315
- import scitex as stx
316
-
317
- args = parse_args()
318
-
319
- CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
320
- sys,
321
- plt,
322
- args=args,
323
- file=__FILE__,
324
- sdir_suffix=None,
325
- verbose=False,
326
- agg=True,
327
- )
328
-
329
- exit_status = main(args)
330
-
331
- stx.session.close(
332
- CONFIG,
333
- verbose=False,
334
- notify=False,
335
- message="",
336
- exit_status=exit_status,
289
+ logger.success(
290
+ f"BibTeX processing complete: {len(papers)} papers processed"
337
291
  )
292
+ return 0
338
293
 
339
294
 
340
295
  if __name__ == "__main__":
341
- run_main()
296
+ main()
342
297
 
343
298
  """
344
- Usage:
345
-
346
- # Process BibTeX file with 8 workers
347
- python -m scitex.scholar.pipelines.ScholarPipelineBibTeX \
348
- --bibtex ./data/scholar/bib_files/neurovista.bib \
349
- --project neurovista \
350
- --num-workers 8 \
351
- --chrome-profile system \
352
- --browser-mode stealth
299
+ python -m scitex.scholar.pipelines.ScholarPipelineBibTeX \
300
+ --bibtex ./data/scholar/bib_files/neurovista.bib \
301
+ --project neurovista \
302
+ --num-workers 8 \
303
+ --chrome-profile system \
304
+ --browser-mode interactive
353
305
  """
354
306
 
355
307
  # EOF