scitex 2.14.0__py3-none-any.whl → 2.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +21 -204
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +76 -27
  68. scitex/cli/capture.py +13 -20
  69. scitex/cli/introspect.py +481 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +357 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +23 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +314 -0
  79. scitex/cli/stats.py +15 -8
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +132 -8
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  178. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  179. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  180. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  181. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  182. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  183. scitex/security/README.md +3 -3
  184. scitex/session/README.md +1 -1
  185. scitex/session/__init__.py +26 -7
  186. scitex/session/_decorator.py +1 -1
  187. scitex/sh/README.md +1 -1
  188. scitex/sh/__init__.py +7 -4
  189. scitex/social/__init__.py +155 -0
  190. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  191. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  192. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  193. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  194. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  195. scitex/stats/_mcp/_handlers/_format.py +94 -0
  196. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  197. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  198. scitex/stats/_mcp/_handlers/_power.py +247 -0
  199. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  200. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  201. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  202. scitex/stats/_mcp/handlers.py +19 -1171
  203. scitex/stats/auto/_stat_style.py +175 -0
  204. scitex/stats/auto/_style_definitions.py +411 -0
  205. scitex/stats/auto/_styles.py +22 -620
  206. scitex/stats/descriptive/__init__.py +11 -8
  207. scitex/stats/descriptive/_ci.py +39 -0
  208. scitex/stats/power/_power.py +15 -4
  209. scitex/str/__init__.py +2 -1
  210. scitex/str/_title_case.py +63 -0
  211. scitex/template/README.md +1 -1
  212. scitex/template/__init__.py +25 -10
  213. scitex/template/_code_templates.py +147 -0
  214. scitex/template/_mcp/handlers.py +81 -0
  215. scitex/template/_mcp/tool_schemas.py +55 -0
  216. scitex/template/_templates/__init__.py +51 -0
  217. scitex/template/_templates/audio.py +233 -0
  218. scitex/template/_templates/canvas.py +312 -0
  219. scitex/template/_templates/capture.py +268 -0
  220. scitex/template/_templates/config.py +43 -0
  221. scitex/template/_templates/diagram.py +294 -0
  222. scitex/template/_templates/io.py +107 -0
  223. scitex/template/_templates/module.py +53 -0
  224. scitex/template/_templates/plt.py +202 -0
  225. scitex/template/_templates/scholar.py +267 -0
  226. scitex/template/_templates/session.py +130 -0
  227. scitex/template/_templates/session_minimal.py +43 -0
  228. scitex/template/_templates/session_plot.py +67 -0
  229. scitex/template/_templates/session_stats.py +77 -0
  230. scitex/template/_templates/stats.py +323 -0
  231. scitex/template/_templates/writer.py +296 -0
  232. scitex/template/clone_writer_directory.py +5 -5
  233. scitex/ui/_backends/_email.py +10 -2
  234. scitex/ui/_backends/_webhook.py +5 -1
  235. scitex/web/_search_pubmed.py +10 -6
  236. scitex/writer/README.md +1 -1
  237. scitex/writer/_mcp/handlers.py +11 -744
  238. scitex/writer/_mcp/tool_schemas.py +5 -335
  239. scitex-2.15.2.dist-info/METADATA +648 -0
  240. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/RECORD +246 -150
  241. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  242. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  243. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  244. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  245. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  246. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  247. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  248. scitex/diagram/_compile.py +0 -312
  249. scitex/diagram/_diagram.py +0 -355
  250. scitex/diagram/_mcp/__init__.py +0 -4
  251. scitex/diagram/_mcp/handlers.py +0 -400
  252. scitex/diagram/_mcp/tool_schemas.py +0 -157
  253. scitex/diagram/_presets.py +0 -173
  254. scitex/diagram/_schema.py +0 -182
  255. scitex/diagram/_split.py +0 -278
  256. scitex/gen/_ci.py +0 -12
  257. scitex/gen/_title_case.py +0 -89
  258. scitex/plt/_mcp/__init__.py +0 -4
  259. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  260. scitex/plt/_mcp/_handlers_figure.py +0 -195
  261. scitex/plt/_mcp/_handlers_plot.py +0 -252
  262. scitex/plt/_mcp/_handlers_style.py +0 -219
  263. scitex/plt/_mcp/handlers.py +0 -74
  264. scitex/plt/_mcp/tool_schemas.py +0 -497
  265. scitex/plt/mcp_server.py +0 -231
  266. scitex/scholar/data/.gitkeep +0 -0
  267. scitex/scholar/data/README.md +0 -44
  268. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  269. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  270. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  271. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  272. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  273. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  274. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  275. scitex/scholar/data/bib_files/pac.bib +0 -698
  276. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  277. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  278. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  279. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  280. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  281. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  282. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  283. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  284. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  285. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  286. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  287. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  288. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  289. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  290. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  291. scitex/scholar/data/impact_factor.db +0 -0
  292. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  293. scitex/scholar/examples/dev.py +0 -38
  294. scitex-2.14.0.dist-info/METADATA +0 -1238
  295. /scitex/{gen → context}/_detect_environment.py +0 -0
  296. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  297. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  298. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
  299. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
  300. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,6 @@
1
1
  #!/usr/bin/env python3
2
- # Timestamp: "2025-10-13 06:13:41 (ywatanabe)"
3
- # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pipelines/ScholarPipelineParallel.py
4
- # ----------------------------------------
5
- from __future__ import annotations
6
-
7
- import os
8
-
9
- __FILE__ = "./src/scitex/scholar/pipelines/ScholarPipelineParallel.py"
10
- __DIR__ = os.path.dirname(__FILE__)
11
- # ----------------------------------------
12
-
2
+ # Timestamp: "2026-01-22 (ywatanabe)"
3
+ # File: src/scitex/scholar/pipelines/ScholarPipelineParallel.py
13
4
  """
14
5
  Functionalities:
15
6
  - Orchestrates parallel paper acquisition using multiple browser profiles
@@ -33,11 +24,12 @@ IO:
33
24
  - library/{project}/{paper_id} -> ../MASTER/{paper_id} (multiple symlinks)
34
25
  """
35
26
 
36
- """Imports"""
37
- import argparse
27
+ from __future__ import annotations
28
+
38
29
  import asyncio
39
30
  from typing import List, Optional
40
31
 
32
+ import scitex as stx
41
33
  from scitex import logging
42
34
  from scitex.browser.core import ChromeProfileManager
43
35
  from scitex.scholar.auth import ScholarAuthManager
@@ -79,7 +71,8 @@ class ScholarPipelineParallel:
79
71
  async def _verify_authentication_async(self) -> bool:
80
72
  """Pre-verify authentication once before spawning workers.
81
73
 
82
- Returns:
74
+ Returns
75
+ -------
83
76
  True if authenticated, False otherwise
84
77
  """
85
78
  logger.info(f"{self.name}: Verifying authentication...")
@@ -110,7 +103,8 @@ class ScholarPipelineParallel:
110
103
  Args:
111
104
  num_workers: Number of workers to prepare (defaults to self.num_workers)
112
105
 
113
- Returns:
106
+ Returns
107
+ -------
114
108
  List of worker profile names
115
109
  """
116
110
  workers_to_prepare = (
@@ -157,7 +151,8 @@ class ScholarPipelineParallel:
157
151
  worker_id: Worker ID for logging
158
152
  worker_profile: Chrome profile name for this worker
159
153
 
160
- Returns:
154
+ Returns
155
+ -------
161
156
  Paper object if successful, None otherwise
162
157
  """
163
158
  logger.info(
@@ -200,7 +195,8 @@ class ScholarPipelineParallel:
200
195
  doi_or_title_list: List of DOI or title strings
201
196
  project: Project name for symlinking (optional)
202
197
 
203
- Returns:
198
+ Returns
199
+ -------
204
200
  List of successfully processed Paper objects
205
201
  """
206
202
  if not doi_or_title_list:
@@ -291,7 +287,8 @@ class ScholarPipelineParallel:
291
287
  papers: Papers collection
292
288
  project: Project name for symlinking (optional, uses papers.project if None)
293
289
 
294
- Returns:
290
+ Returns
291
+ -------
295
292
  List of successfully processed Paper objects
296
293
  """
297
294
  # Extract DOIs or titles from papers
@@ -314,34 +311,64 @@ class ScholarPipelineParallel:
314
311
  )
315
312
 
316
313
 
317
- def main(args):
318
- """Run parallel pipeline"""
319
-
314
+ @stx.session
315
+ def main(
316
+ dois: str = None,
317
+ titles: str = None,
318
+ project: str = None,
319
+ num_workers: int = 4,
320
+ browser_mode: str = "stealth",
321
+ chrome_profile: str = "system",
322
+ CONFIG=stx.INJECTED,
323
+ logger=stx.INJECTED,
324
+ ) -> int:
325
+ """Orchestrate parallel paper acquisition pipeline.
326
+
327
+ Parameters
328
+ ----------
329
+ dois : str
330
+ Comma-separated DOIs (e.g., '10.1038/...,10.1016/...')
331
+ titles : str
332
+ Comma-separated paper titles
333
+ project : str
334
+ Project name for symlinking (optional)
335
+ num_workers : int
336
+ Number of parallel workers (default: 4)
337
+ browser_mode : str
338
+ Browser mode: 'stealth' or 'interactive' (default: stealth)
339
+ chrome_profile : str
340
+ Base Chrome profile name to sync from (default: system)
341
+
342
+ Returns
343
+ -------
344
+ int
345
+ Exit status code (0 for success)
346
+ """
320
347
  # Parse input queries
321
348
  queries = []
322
- if args.dois:
323
- queries.extend(args.dois.split(","))
324
- if args.titles:
325
- queries.extend(args.titles.split(","))
349
+ if dois:
350
+ queries.extend(dois.split(","))
351
+ if titles:
352
+ queries.extend(titles.split(","))
326
353
 
327
354
  if not queries:
328
355
  logger.error("No queries provided. Use --dois or --titles")
329
356
  return 1
330
357
 
331
- logger.info(f"Processing {len(queries)} queries with {args.num_workers} workers")
358
+ logger.info(f"Processing {len(queries)} queries with {num_workers} workers")
332
359
 
333
360
  # Create parallel pipeline
334
361
  parallel_pipeline = ScholarPipelineParallel(
335
- num_workers=args.num_workers,
336
- browser_mode=args.browser_mode,
337
- base_chrome_profile=args.chrome_profile,
362
+ num_workers=num_workers,
363
+ browser_mode=browser_mode,
364
+ base_chrome_profile=chrome_profile,
338
365
  )
339
366
 
340
367
  # Run pipeline
341
368
  papers = asyncio.run(
342
369
  parallel_pipeline.process_papers_from_list_async(
343
370
  doi_or_title_list=queries,
344
- project=args.project,
371
+ project=project,
345
372
  )
346
373
  )
347
374
 
@@ -349,114 +376,29 @@ def main(args):
349
376
  return 0
350
377
 
351
378
 
352
- def parse_args() -> argparse.Namespace:
353
- """Parse command line arguments."""
354
- parser = argparse.ArgumentParser(
355
- description="Orchestrate parallel paper acquisition pipeline"
356
- )
357
- parser.add_argument(
358
- "--dois",
359
- type=str,
360
- default=None,
361
- help="Comma-separated DOIs (e.g., '10.1038/...,10.1016/...')",
362
- )
363
- parser.add_argument(
364
- "--titles",
365
- type=str,
366
- default=None,
367
- help="Comma-separated paper titles",
368
- )
369
- parser.add_argument(
370
- "--project",
371
- type=str,
372
- default=None,
373
- help="Project name for symlinking (optional)",
374
- )
375
- parser.add_argument(
376
- "--num-workers",
377
- type=int,
378
- default=4,
379
- help="Number of parallel workers (default: 4)",
380
- )
381
- parser.add_argument(
382
- "--browser-mode",
383
- type=str,
384
- choices=["stealth", "interactive"],
385
- default="stealth",
386
- help="Browser mode (default: stealth)",
387
- )
388
- parser.add_argument(
389
- "--chrome-profile",
390
- type=str,
391
- default="system",
392
- help="Base Chrome profile name to sync from (default: system)",
393
- )
394
- args = parser.parse_args()
395
- return args
396
-
397
-
398
- def run_main() -> None:
399
- """Initialize scitex framework, run main function, and cleanup."""
400
- global CONFIG, CC, sys, plt, rng
401
-
402
- import sys
403
-
404
- import matplotlib.pyplot as plt
405
-
406
- import scitex as stx
407
-
408
- args = parse_args()
409
-
410
- CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
411
- sys,
412
- plt,
413
- args=args,
414
- file=__FILE__,
415
- sdir_suffix=None,
416
- verbose=False,
417
- agg=True,
418
- )
419
-
420
- exit_status = main(args)
421
-
422
- stx.session.close(
423
- CONFIG,
424
- verbose=False,
425
- notify=False,
426
- message="",
427
- exit_status=exit_status,
428
- )
429
-
430
-
431
379
  if __name__ == "__main__":
432
- run_main()
433
-
434
- """
435
- Usage:
436
- # With DOIs (4 workers)
437
- python -m scitex.scholar.pipelines.ScholarPipelineParallel \
438
- --dois "10.1212/wnl.0000000000200348,10.1038/s41598-017-02626-y" \
439
- --project neurovista \
440
- --num-workers 4 \
441
- --browser-mode stealth \
442
- --chrome-profile system
443
-
444
- # With titles (2 workers)
445
- python -m scitex.scholar.pipelines.ScholarPipelineParallel \
446
- --titles "Attention Is All You Need,BERT: Pre-training of Deep Bidirectional Transformers" \
447
- --project transformers \
448
- --num-workers 2 \
449
- --browser-mode stealth \
450
- --chrome-profile system
451
-
452
- # Mixed DOIs and titles (8 workers)
453
- python -m scitex.scholar.pipelines.ScholarPipelineParallel \
454
- --dois "10.1038/s41593-025-01990-7" \
455
- --titles "Neural State Monitoring in the Treatment of Epilepsy" \
456
- --project epilepsy \
457
- --num-workers 8 \
458
- --browser-mode stealth \
459
- --chrome-profile system
460
- """
380
+ main()
381
+
382
+ # Usage:
383
+ # # With DOIs (4 workers)
384
+ # python -m scitex.scholar.pipelines.ScholarPipelineParallel \
385
+ # --dois "10.1212/wnl.0000000000200348,10.1038/s41598-017-02626-y" \
386
+ # --project neurovista \
387
+ # --num-workers 4 \
388
+ # --browser-mode stealth \
389
+ # --chrome-profile system
390
+ #
391
+ # # With titles (2 workers)
392
+ # python -m scitex.scholar.pipelines.ScholarPipelineParallel \
393
+ # --titles "Attention Is All You Need,BERT: Pre-training" \
394
+ # --project transformers \
395
+ # --num-workers 2
396
+ #
397
+ # # Mixed DOIs and titles (8 workers)
398
+ # python -m scitex.scholar.pipelines.ScholarPipelineParallel \
399
+ # --dois "10.1038/s41593-025-01990-7" \
400
+ # --titles "Neural State Monitoring in the Treatment of Epilepsy" \
401
+ # --project epilepsy \
402
+ # --num-workers 8
461
403
 
462
404
  # EOF
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- # Timestamp: "2026-01-14 (ywatanabe)"
2
+ # Timestamp: "2026-01-22 (ywatanabe)"
3
3
  # File: src/scitex/scholar/pipelines/ScholarPipelineSingle.py
4
4
  """
5
5
  Single paper acquisition pipeline orchestrator.
@@ -21,10 +21,10 @@ IO:
21
21
 
22
22
  from __future__ import annotations
23
23
 
24
- import argparse
25
24
  import asyncio
26
25
  from typing import Optional
27
26
 
27
+ import scitex as stx
28
28
  from scitex import logging
29
29
  from scitex.scholar.storage import PaperIO
30
30
 
@@ -90,10 +90,10 @@ class ScholarPipelineSingle(PipelineStepsMixin, PipelineHelpersMixin):
90
90
  )
91
91
  if context:
92
92
  await self._step_06_find_pdf_urls(
93
- paper, io, context, auth_gateway, force
93
+ paper, io, context, auth_gateway, force, browser_manager
94
94
  )
95
95
  await self._step_07_download_pdf(
96
- paper, io, context, auth_gateway, force
96
+ paper, io, context, auth_gateway, force, browser_manager
97
97
  )
98
98
  if browser_manager:
99
99
  await browser_manager.close()
@@ -108,75 +108,55 @@ class ScholarPipelineSingle(PipelineStepsMixin, PipelineHelpersMixin):
108
108
  return paper, symlink_path
109
109
 
110
110
 
111
- def main(args):
112
- """Run single paper pipeline."""
111
+ @stx.session
112
+ def main(
113
+ doi_or_title: str = None,
114
+ project: str = None,
115
+ browser_mode: str = "stealth",
116
+ chrome_profile: str = "system",
117
+ force: bool = False,
118
+ CONFIG=stx.INJECTED,
119
+ logger=stx.INJECTED,
120
+ ) -> int:
121
+ """Orchestrate full paper acquisition pipeline.
122
+
123
+ Parameters
124
+ ----------
125
+ doi_or_title : str
126
+ DOI or paper title (required)
127
+ project : str
128
+ Project name for symlinking (optional)
129
+ browser_mode : str
130
+ Browser mode: 'stealth' or 'interactive' (default: stealth)
131
+ chrome_profile : str
132
+ Chrome profile name (default: system)
133
+ force : bool
134
+ Force fresh processing (default: False)
135
+
136
+ Returns
137
+ -------
138
+ int
139
+ Exit status code (0 for success)
140
+ """
141
+ if not doi_or_title:
142
+ logger.error("--doi-or-title is required")
143
+ return 1
144
+
113
145
  pipeline = ScholarPipelineSingle(
114
- browser_mode=args.browser_mode, chrome_profile=args.chrome_profile
146
+ browser_mode=browser_mode, chrome_profile=chrome_profile
115
147
  )
116
148
  paper, symlink_path = asyncio.run(
117
149
  pipeline.process_single_paper(
118
- doi_or_title=args.doi_or_title,
119
- project=args.project,
120
- force=args.force,
150
+ doi_or_title=doi_or_title,
151
+ project=project,
152
+ force=force,
121
153
  )
122
154
  )
123
155
  return 0
124
156
 
125
157
 
126
- def parse_args() -> argparse.Namespace:
127
- """Parse command line arguments."""
128
- parser = argparse.ArgumentParser(
129
- description="Orchestrate full paper acquisition pipeline"
130
- )
131
- parser.add_argument(
132
- "--doi-or-title", type=str, required=True, help="DOI or paper title"
133
- )
134
- parser.add_argument(
135
- "--project", type=str, default=None, help="Project name for symlinking"
136
- )
137
- parser.add_argument(
138
- "--browser-mode",
139
- type=str,
140
- choices=["stealth", "interactive"],
141
- default="stealth",
142
- help="Browser mode (default: stealth)",
143
- )
144
- parser.add_argument(
145
- "--chrome-profile",
146
- type=str,
147
- required=True,
148
- help="Chrome profile name (default: system)",
149
- )
150
- parser.add_argument(
151
- "--force",
152
- "-f",
153
- action="store_true",
154
- default=False,
155
- help="Force fresh processing",
156
- )
157
- return parser.parse_args()
158
-
159
-
160
- def run_main() -> None:
161
- """Initialize scitex framework, run main function, and cleanup."""
162
- import sys
163
-
164
- import matplotlib.pyplot as plt
165
-
166
- import scitex as stx
167
-
168
- args = parse_args()
169
- CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
170
- sys, plt, args=args, file=__file__, sdir_suffix=None, verbose=False, agg=True
171
- )
172
- exit_status = main(args)
173
- stx.session.close(
174
- CONFIG, verbose=False, notify=False, message="", exit_status=exit_status
175
- )
176
-
177
-
178
158
  if __name__ == "__main__":
179
- run_main()
159
+ main()
180
160
 
181
161
  # Usage:
182
162
  # python -m scitex.scholar.pipelines.ScholarPipelineSingle \
@@ -115,7 +115,9 @@ class PipelineStepsMixin:
115
115
  )
116
116
  return browser_manager, context, auth_gateway
117
117
 
118
- async def _step_06_find_pdf_urls(self, paper, io, context, auth_gateway, force):
118
+ async def _step_06_find_pdf_urls(
119
+ self, paper, io, context, auth_gateway, force, browser_manager=None
120
+ ):
119
121
  if not paper.metadata.url.pdfs or force:
120
122
  logger.info(f"{self.name}: Finding PDF URLs...")
121
123
  try:
@@ -127,6 +129,9 @@ class PipelineStepsMixin:
127
129
  )
128
130
  except Exception as e:
129
131
  logger.warning(f"{self.name}: Auth gateway failed: {e}")
132
+ await self._capture_screenshot(
133
+ browser_manager, context, io, "auth_gateway_failed"
134
+ )
130
135
  publisher_url = paper.metadata.id.doi
131
136
  from scitex.scholar import ScholarURLFinder
132
137
 
@@ -135,23 +140,34 @@ class PipelineStepsMixin:
135
140
  paper.metadata.url.pdfs = urls
136
141
  paper.metadata.url.pdfs_engines = ["ScholarURLFinder"]
137
142
  io.save_metadata()
143
+ if not urls:
144
+ await self._capture_screenshot(
145
+ browser_manager, context, io, "no_pdf_urls_found"
146
+ )
138
147
  logger.info(f"{self.name}: Found {len(urls)} PDF URL(s)")
139
148
  else:
140
149
  logger.info(f"{self.name}: PDF URLs exist ({len(paper.metadata.url.pdfs)})")
141
150
 
142
- async def _step_07_download_pdf(self, paper, io, context, auth_gateway, force):
151
+ async def _step_07_download_pdf(
152
+ self, paper, io, context, auth_gateway, force, browser_manager=None
153
+ ):
143
154
  if (not io.has_pdf() or force) and paper.metadata.url.pdfs:
144
155
  logger.info(f"{self.name}: Downloading PDF...")
145
156
  from scitex.scholar.pdf_download import ScholarPDFDownloader
146
157
 
147
158
  downloader = ScholarPDFDownloader(context)
148
- downloaded, temp_path = await self._download_pdf_from_url(
159
+ downloaded, temp_path, download_method = await self._download_pdf_from_url(
149
160
  paper, io, context, auth_gateway, downloader
150
161
  )
151
162
  if downloaded:
152
- self._handle_downloaded_pdf(paper, io, downloaded, temp_path)
163
+ self._handle_downloaded_pdf(
164
+ paper, io, downloaded, temp_path, download_method
165
+ )
153
166
  else:
154
- self._check_manual_download(io)
167
+ await self._capture_screenshot(
168
+ browser_manager, context, io, "pdf_download_failed"
169
+ )
170
+ self._check_manual_download(io, paper)
155
171
  elif io.has_pdf():
156
172
  logger.info(f"{self.name}: PDF already exists, skipping download")
157
173
 
@@ -211,24 +227,35 @@ class PipelineStepsMixin:
211
227
  downloaded_file = await downloader.download_from_url(
212
228
  pdf_url, output_path=temp_pdf_path, doi=paper.metadata.id.doi
213
229
  )
214
- return downloaded_file, temp_pdf_path
215
-
216
- def _handle_downloaded_pdf(self, paper, io, downloaded_file, temp_pdf_path):
230
+ # Track download method based on context flags
231
+ download_method = "unknown"
232
+ if downloaded_file:
233
+ is_manual = getattr(context, "_scitex_is_manual_mode", False)
234
+ download_method = "manual_download" if is_manual else "automated"
235
+ return downloaded_file, temp_pdf_path, download_method
236
+
237
+ def _handle_downloaded_pdf(
238
+ self, paper, io, downloaded_file, temp_pdf_path, download_method="unknown"
239
+ ):
217
240
  import shutil
218
241
 
219
242
  if downloaded_file == temp_pdf_path and temp_pdf_path.exists():
220
243
  main_pdf = io.get_pdf_path()
221
244
  shutil.move(str(temp_pdf_path), str(main_pdf))
222
245
  paper.metadata.path.pdfs = [str(main_pdf)]
246
+ paper.metadata.path.pdfs_engines = [download_method]
223
247
  paper.container.pdf_size_bytes = main_pdf.stat().st_size
224
248
  io.save_metadata()
225
- logger.success(f"{self.name}: PDF downloaded to MASTER")
249
+ logger.success(
250
+ f"{self.name}: PDF downloaded to MASTER via {download_method}"
251
+ )
226
252
  else:
227
253
  io.save_pdf(downloaded_file)
254
+ paper.metadata.path.pdfs_engines = [download_method]
228
255
  io.save_metadata()
229
256
  logger.info(f"{self.name}: PDF saved ({str(downloaded_file)})")
230
257
 
231
- def _check_manual_download(self, io):
258
+ def _check_manual_download(self, io, paper=None):
232
259
  import time
233
260
 
234
261
  from scitex.scholar import ScholarConfig
@@ -249,6 +276,9 @@ class PipelineStepsMixin:
249
276
  latest_pdf = recent_pdfs[0][0]
250
277
  logger.info(f"{self.name}: Found recent PDF: {latest_pdf.name}")
251
278
  io.save_pdf(latest_pdf)
279
+ # Track as manual download
280
+ if paper:
281
+ paper.metadata.path.pdfs_engines = ["manual_download"]
252
282
  io.save_metadata()
253
283
  logger.success(f"{self.name}: Manual PDF saved to MASTER")
254
284
  else:
@@ -258,39 +288,44 @@ class PipelineStepsMixin:
258
288
  class PipelineHelpersMixin:
259
289
  """Mixin containing helper methods for single paper pipeline."""
260
290
 
291
+ async def _capture_screenshot(self, browser_manager, context, io, description):
292
+ """Capture screenshot for debugging when issues occur."""
293
+ if not browser_manager or not context:
294
+ return
295
+ try:
296
+ from datetime import datetime
297
+
298
+ screenshots_dir = io.paper_dir / "screenshots"
299
+ screenshots_dir.mkdir(parents=True, exist_ok=True)
300
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
301
+ screenshot_path = screenshots_dir / f"{timestamp}_{description}.png"
302
+ pages = context.pages
303
+ if pages:
304
+ page = pages[0]
305
+ await browser_manager.take_screenshot_async(
306
+ page, str(screenshot_path), full_page=True
307
+ )
308
+ logger.info(f"{self.name}: Screenshot saved: {screenshot_path.name}")
309
+ except Exception as e:
310
+ logger.debug(f"{self.name}: Screenshot capture failed: {e}")
311
+
261
312
  def _generate_paper_id(self, doi: str) -> str:
262
313
  """Generate 8-digit library ID from DOI."""
263
314
  return hashlib.md5(f"DOI:{doi}".encode()).hexdigest()[:8].upper()
264
315
 
265
316
  def _link_to_project(self, paper: Paper, project: str, io: PaperIO) -> Path:
266
- """Create human-readable symlink in project directory."""
267
- from scitex.scholar import ScholarConfig
317
+ """Create human-readable symlink in project directory using LibraryManager."""
318
+ from scitex.scholar.storage import LibraryManager
268
319
 
269
- config = ScholarConfig()
270
- project_dir = config.path_manager.get_library_project_dir(project)
271
- pdf_files = list(io.paper_dir.glob("*.pdf"))
272
- entry_name = config.path_manager.get_library_project_entry_dirname(
273
- n_pdfs=len(pdf_files),
274
- citation_count=paper.metadata.citation_count.total or 0,
275
- impact_factor=int(paper.metadata.publication.impact_factor or 0),
276
- year=paper.metadata.basic.year or 0,
277
- first_author=(
278
- paper.metadata.basic.authors[0].split()[-1]
279
- if paper.metadata.basic.authors
280
- else "Unknown"
281
- ),
282
- journal_name=(
283
- paper.metadata.publication.short_journal
284
- or paper.metadata.publication.journal
285
- or "Unknown"
286
- ),
320
+ library_manager = LibraryManager()
321
+ symlink_path = library_manager.update_symlink(
322
+ master_storage_path=io.paper_dir,
323
+ project=project,
287
324
  )
288
- symlink_path = project_dir / entry_name
289
- target_path = Path("../MASTER") / paper.container.library_id
290
- if symlink_path.exists() or symlink_path.is_symlink():
291
- symlink_path.unlink()
292
- symlink_path.symlink_to(target_path)
293
- logger.success(f"{self.name}: Created symlink: {project}/{entry_name}")
325
+ if symlink_path:
326
+ logger.success(
327
+ f"{self.name}: Created symlink: {project}/{symlink_path.name}"
328
+ )
294
329
  return symlink_path
295
330
 
296
331
  def _enrich_impact_factor(self, paper: Paper) -> None: