scitex 2.14.0__py3-none-any.whl → 2.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +71 -17
- scitex/_env_loader.py +156 -0
- scitex/_mcp_resources/__init__.py +37 -0
- scitex/_mcp_resources/_cheatsheet.py +135 -0
- scitex/_mcp_resources/_figrecipe.py +138 -0
- scitex/_mcp_resources/_formats.py +102 -0
- scitex/_mcp_resources/_modules.py +337 -0
- scitex/_mcp_resources/_session.py +149 -0
- scitex/_mcp_tools/__init__.py +4 -0
- scitex/_mcp_tools/audio.py +66 -0
- scitex/_mcp_tools/diagram.py +11 -95
- scitex/_mcp_tools/introspect.py +210 -0
- scitex/_mcp_tools/plt.py +260 -305
- scitex/_mcp_tools/scholar.py +74 -0
- scitex/_mcp_tools/social.py +27 -0
- scitex/_mcp_tools/template.py +24 -0
- scitex/_mcp_tools/writer.py +17 -210
- scitex/ai/_gen_ai/_PARAMS.py +10 -7
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
- scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
- scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
- scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
- scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
- scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
- scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
- scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
- scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
- scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
- scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
- scitex/audio/README.md +40 -36
- scitex/audio/__init__.py +129 -61
- scitex/audio/_branding.py +185 -0
- scitex/audio/_mcp/__init__.py +32 -0
- scitex/audio/_mcp/handlers.py +59 -6
- scitex/audio/_mcp/speak_handlers.py +238 -0
- scitex/audio/_relay.py +225 -0
- scitex/audio/_tts.py +18 -10
- scitex/audio/engines/base.py +17 -10
- scitex/audio/engines/elevenlabs_engine.py +7 -2
- scitex/audio/mcp_server.py +228 -75
- scitex/canvas/README.md +1 -1
- scitex/canvas/editor/_dearpygui/__init__.py +25 -0
- scitex/canvas/editor/_dearpygui/_editor.py +147 -0
- scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
- scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
- scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
- scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
- scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
- scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
- scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
- scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
- scitex/canvas/editor/_dearpygui/_selection.py +295 -0
- scitex/canvas/editor/_dearpygui/_state.py +93 -0
- scitex/canvas/editor/_dearpygui/_utils.py +61 -0
- scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
- scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
- scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
- scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
- scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
- scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
- scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
- scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
- scitex/canvas/editor/flask_editor/_core.py +25 -1684
- scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
- scitex/cli/__init__.py +38 -43
- scitex/cli/audio.py +160 -41
- scitex/cli/capture.py +133 -20
- scitex/cli/introspect.py +488 -0
- scitex/cli/main.py +200 -109
- scitex/cli/mcp.py +60 -34
- scitex/cli/plt.py +414 -0
- scitex/cli/repro.py +15 -8
- scitex/cli/resource.py +15 -8
- scitex/cli/scholar/__init__.py +154 -8
- scitex/cli/scholar/_crossref_scitex.py +296 -0
- scitex/cli/scholar/_fetch.py +25 -3
- scitex/cli/social.py +355 -0
- scitex/cli/stats.py +136 -11
- scitex/cli/template.py +129 -12
- scitex/cli/tex.py +15 -8
- scitex/cli/writer.py +49 -299
- scitex/cloud/__init__.py +41 -2
- scitex/config/README.md +1 -1
- scitex/config/__init__.py +16 -2
- scitex/config/_env_registry.py +256 -0
- scitex/context/__init__.py +22 -0
- scitex/dev/__init__.py +20 -1
- scitex/diagram/__init__.py +42 -19
- scitex/diagram/mcp_server.py +13 -125
- scitex/gen/__init__.py +50 -14
- scitex/gen/_list_packages.py +4 -4
- scitex/introspect/__init__.py +82 -0
- scitex/introspect/_call_graph.py +303 -0
- scitex/introspect/_class_hierarchy.py +163 -0
- scitex/introspect/_core.py +41 -0
- scitex/introspect/_docstring.py +131 -0
- scitex/introspect/_examples.py +113 -0
- scitex/introspect/_imports.py +271 -0
- scitex/{gen/_inspect_module.py → introspect/_list_api.py} +48 -56
- scitex/introspect/_mcp/__init__.py +41 -0
- scitex/introspect/_mcp/handlers.py +233 -0
- scitex/introspect/_members.py +155 -0
- scitex/introspect/_resolve.py +89 -0
- scitex/introspect/_signature.py +131 -0
- scitex/introspect/_source.py +80 -0
- scitex/introspect/_type_hints.py +172 -0
- scitex/io/_save.py +1 -2
- scitex/io/bundle/README.md +1 -1
- scitex/logging/_formatters.py +19 -9
- scitex/mcp_server.py +98 -5
- scitex/os/__init__.py +4 -0
- scitex/{gen → os}/_check_host.py +4 -5
- scitex/plt/__init__.py +245 -550
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
- scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/plt/gallery/README.md +1 -1
- scitex/plt/utils/_hitmap/__init__.py +82 -0
- scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
- scitex/plt/utils/_hitmap/_color_application.py +346 -0
- scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
- scitex/plt/utils/_hitmap/_constants.py +40 -0
- scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
- scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
- scitex/plt/utils/_hitmap/_query.py +113 -0
- scitex/plt/utils/_hitmap.py +46 -1616
- scitex/plt/utils/_metadata/__init__.py +80 -0
- scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
- scitex/plt/utils/_metadata/_artists/_base.py +195 -0
- scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
- scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
- scitex/plt/utils/_metadata/_artists/_images.py +80 -0
- scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
- scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
- scitex/plt/utils/_metadata/_artists/_text.py +106 -0
- scitex/plt/utils/_metadata/_csv.py +416 -0
- scitex/plt/utils/_metadata/_detect.py +225 -0
- scitex/plt/utils/_metadata/_legend.py +127 -0
- scitex/plt/utils/_metadata/_rounding.py +117 -0
- scitex/plt/utils/_metadata/_verification.py +202 -0
- scitex/schema/README.md +1 -1
- scitex/scholar/__init__.py +8 -0
- scitex/scholar/_mcp/crossref_handlers.py +265 -0
- scitex/scholar/core/Scholar.py +63 -1700
- scitex/scholar/core/_mixins/__init__.py +36 -0
- scitex/scholar/core/_mixins/_enrichers.py +270 -0
- scitex/scholar/core/_mixins/_library_handlers.py +100 -0
- scitex/scholar/core/_mixins/_loaders.py +103 -0
- scitex/scholar/core/_mixins/_pdf_download.py +375 -0
- scitex/scholar/core/_mixins/_pipeline.py +312 -0
- scitex/scholar/core/_mixins/_project_handlers.py +125 -0
- scitex/scholar/core/_mixins/_savers.py +69 -0
- scitex/scholar/core/_mixins/_search.py +103 -0
- scitex/scholar/core/_mixins/_services.py +88 -0
- scitex/scholar/core/_mixins/_url_finding.py +105 -0
- scitex/scholar/crossref_scitex.py +367 -0
- scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/scholar/examples/00_run_all.sh +120 -0
- scitex/scholar/jobs/_executors.py +27 -3
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
- scitex/scholar/pdf_download/_cli.py +154 -0
- scitex/scholar/pdf_download/strategies/__init__.py +11 -8
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
- scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
- scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
- scitex/scholar/pipelines/_single_steps.py +71 -36
- scitex/scholar/storage/_LibraryManager.py +97 -1695
- scitex/scholar/storage/_mixins/__init__.py +30 -0
- scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
- scitex/scholar/storage/_mixins/_library_operations.py +218 -0
- scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
- scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
- scitex/scholar/storage/_mixins/_resolution.py +376 -0
- scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
- scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
- scitex/security/README.md +3 -3
- scitex/session/README.md +1 -1
- scitex/session/__init__.py +26 -7
- scitex/session/_decorator.py +1 -1
- scitex/sh/README.md +1 -1
- scitex/sh/__init__.py +7 -4
- scitex/social/__init__.py +155 -0
- scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/stats/_mcp/_handlers/__init__.py +31 -0
- scitex/stats/_mcp/_handlers/_corrections.py +113 -0
- scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
- scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
- scitex/stats/_mcp/_handlers/_format.py +94 -0
- scitex/stats/_mcp/_handlers/_normality.py +110 -0
- scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
- scitex/stats/_mcp/_handlers/_power.py +247 -0
- scitex/stats/_mcp/_handlers/_recommend.py +102 -0
- scitex/stats/_mcp/_handlers/_run_test.py +279 -0
- scitex/stats/_mcp/_handlers/_stars.py +48 -0
- scitex/stats/_mcp/handlers.py +19 -1171
- scitex/stats/auto/_stat_style.py +175 -0
- scitex/stats/auto/_style_definitions.py +411 -0
- scitex/stats/auto/_styles.py +22 -620
- scitex/stats/descriptive/__init__.py +11 -8
- scitex/stats/descriptive/_ci.py +39 -0
- scitex/stats/power/_power.py +15 -4
- scitex/str/__init__.py +2 -1
- scitex/str/_title_case.py +63 -0
- scitex/template/README.md +1 -1
- scitex/template/__init__.py +25 -10
- scitex/template/_code_templates.py +147 -0
- scitex/template/_mcp/handlers.py +81 -0
- scitex/template/_mcp/tool_schemas.py +55 -0
- scitex/template/_templates/__init__.py +51 -0
- scitex/template/_templates/audio.py +233 -0
- scitex/template/_templates/canvas.py +312 -0
- scitex/template/_templates/capture.py +268 -0
- scitex/template/_templates/config.py +43 -0
- scitex/template/_templates/diagram.py +294 -0
- scitex/template/_templates/io.py +107 -0
- scitex/template/_templates/module.py +53 -0
- scitex/template/_templates/plt.py +202 -0
- scitex/template/_templates/scholar.py +267 -0
- scitex/template/_templates/session.py +130 -0
- scitex/template/_templates/session_minimal.py +43 -0
- scitex/template/_templates/session_plot.py +67 -0
- scitex/template/_templates/session_stats.py +77 -0
- scitex/template/_templates/stats.py +323 -0
- scitex/template/_templates/writer.py +296 -0
- scitex/template/clone_writer_directory.py +5 -5
- scitex/ui/_backends/_email.py +10 -2
- scitex/ui/_backends/_webhook.py +5 -1
- scitex/web/_search_pubmed.py +10 -6
- scitex/writer/README.md +1 -1
- scitex/writer/__init__.py +43 -34
- scitex/writer/_mcp/handlers.py +11 -744
- scitex/writer/_mcp/tool_schemas.py +5 -335
- scitex-2.15.3.dist-info/METADATA +667 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/RECORD +241 -120
- scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
- scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
- scitex/diagram/_compile.py +0 -312
- scitex/diagram/_diagram.py +0 -355
- scitex/diagram/_mcp/__init__.py +0 -4
- scitex/diagram/_mcp/handlers.py +0 -400
- scitex/diagram/_mcp/tool_schemas.py +0 -157
- scitex/diagram/_presets.py +0 -173
- scitex/diagram/_schema.py +0 -182
- scitex/diagram/_split.py +0 -278
- scitex/gen/_ci.py +0 -12
- scitex/gen/_title_case.py +0 -89
- scitex/plt/_mcp/__init__.py +0 -4
- scitex/plt/_mcp/_handlers_annotation.py +0 -102
- scitex/plt/_mcp/_handlers_figure.py +0 -195
- scitex/plt/_mcp/_handlers_plot.py +0 -252
- scitex/plt/_mcp/_handlers_style.py +0 -219
- scitex/plt/_mcp/handlers.py +0 -74
- scitex/plt/_mcp/tool_schemas.py +0 -497
- scitex/plt/mcp_server.py +0 -231
- scitex/scholar/examples/SUGGESTIONS.md +0 -865
- scitex/scholar/examples/dev.py +0 -38
- scitex-2.14.0.dist-info/METADATA +0 -1238
- /scitex/{gen → context}/_detect_environment.py +0 -0
- /scitex/{gen → context}/_get_notebook_path.py +0 -0
- /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/WHEEL +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/entry_points.txt +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,20 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
# ----------------------------------------
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
import os
|
|
8
|
-
|
|
9
|
-
__FILE__ = "./src/scitex/scholar/pdf_download/ScholarPDFDownloader.py"
|
|
10
|
-
__DIR__ = os.path.dirname(__FILE__)
|
|
11
|
-
# ----------------------------------------
|
|
2
|
+
# Timestamp: "2026-01-22 (ywatanabe)"
|
|
3
|
+
# File: src/scitex/scholar/pdf_download/ScholarPDFDownloader.py
|
|
4
|
+
"""PDF downloader with multiple fallback strategies."""
|
|
12
5
|
|
|
13
|
-
import
|
|
6
|
+
from __future__ import annotations
|
|
14
7
|
|
|
15
|
-
__FILE__ = __file__
|
|
16
8
|
import asyncio
|
|
17
|
-
import
|
|
9
|
+
import os
|
|
18
10
|
import traceback
|
|
19
11
|
from pathlib import Path
|
|
20
12
|
from typing import List, Optional, Union
|
|
@@ -22,17 +14,14 @@ from typing import List, Optional, Union
|
|
|
22
14
|
from playwright.async_api import BrowserContext
|
|
23
15
|
|
|
24
16
|
from scitex import logging
|
|
25
|
-
from scitex.browser.debugging import browser_logger
|
|
26
17
|
from scitex.scholar import ScholarConfig
|
|
27
18
|
from scitex.scholar.pdf_download.strategies import (
|
|
28
|
-
DownloadMonitorAndSync,
|
|
29
19
|
FlexibleFilenameGenerator,
|
|
30
|
-
|
|
20
|
+
handle_manual_download_on_page_async,
|
|
31
21
|
try_download_chrome_pdf_viewer_async,
|
|
32
22
|
try_download_direct_async,
|
|
33
|
-
try_download_manual_async,
|
|
34
|
-
try_download_response_body_async,
|
|
35
23
|
try_download_open_access_async,
|
|
24
|
+
try_download_response_body_async,
|
|
36
25
|
)
|
|
37
26
|
|
|
38
27
|
logger = logging.getLogger(__name__)
|
|
@@ -41,31 +30,21 @@ logger = logging.getLogger(__name__)
|
|
|
41
30
|
class ScholarPDFDownloader:
|
|
42
31
|
"""Download PDFs from URLs with multiple fallback strategies.
|
|
43
32
|
|
|
44
|
-
|
|
33
|
+
Strategies tried in order:
|
|
45
34
|
- Chrome PDF Viewer
|
|
46
35
|
- Direct Download (ERR_ABORTED)
|
|
47
36
|
- Response Body Extraction
|
|
48
37
|
- Manual Download Fallback
|
|
49
38
|
|
|
50
|
-
URL resolution (DOI
|
|
51
|
-
|
|
52
|
-
Logging Strategy:
|
|
53
|
-
- Uses `logger` for terminal-only logs (batch operations, coordination)
|
|
54
|
-
- Uses `await browser_logger` for browser automation logs (visual popups)
|
|
55
|
-
- All messages prefixed with self.name for traceability
|
|
39
|
+
URL resolution (DOI -> URL) should be handled by the caller.
|
|
56
40
|
"""
|
|
57
41
|
|
|
58
|
-
def __init__(
|
|
59
|
-
self,
|
|
60
|
-
context: BrowserContext,
|
|
61
|
-
config: ScholarConfig = None,
|
|
62
|
-
):
|
|
42
|
+
def __init__(self, context: BrowserContext, config: ScholarConfig = None):
|
|
63
43
|
self.name = self.__class__.__name__
|
|
64
44
|
self.config = config if config else ScholarConfig()
|
|
65
45
|
self.context = context
|
|
66
46
|
self.output_dir = self.config.get_library_downloads_dir()
|
|
67
47
|
|
|
68
|
-
# Load access preferences from config
|
|
69
48
|
self.prefer_open_access = self.config.resolve(
|
|
70
49
|
"prefer_open_access", default=True, type=bool
|
|
71
50
|
)
|
|
@@ -79,27 +58,14 @@ class ScholarPDFDownloader:
|
|
|
79
58
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
80
59
|
pass
|
|
81
60
|
|
|
82
|
-
# Main entry points
|
|
83
|
-
# ----------------------------------------
|
|
84
|
-
|
|
85
61
|
async def download_from_urls(
|
|
86
62
|
self,
|
|
87
63
|
pdf_urls: List[str],
|
|
88
64
|
output_dir: Union[str, Path] = None,
|
|
89
65
|
max_concurrent: int = 3,
|
|
90
66
|
) -> List[Path]:
|
|
91
|
-
"""Download multiple PDFs with parallel processing.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
pdf_urls: List of PDF URLs to download
|
|
95
|
-
output_dir: Output directory for downloaded PDFs
|
|
96
|
-
max_concurrent: Maximum number of concurrent downloads (default: 3)
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
List of paths to suffcessfully downloaded PDFs
|
|
100
|
-
"""
|
|
67
|
+
"""Download multiple PDFs with parallel processing."""
|
|
101
68
|
output_dir = output_dir or self.output_dir
|
|
102
|
-
|
|
103
69
|
if not pdf_urls:
|
|
104
70
|
return []
|
|
105
71
|
|
|
@@ -108,7 +74,6 @@ class ScholarPDFDownloader:
|
|
|
108
74
|
for ii_pdf, pdf_url in enumerate(pdf_urls)
|
|
109
75
|
]
|
|
110
76
|
|
|
111
|
-
# Use semaphore for controlled parallelization
|
|
112
77
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
113
78
|
|
|
114
79
|
async def download_with_semaphore(url: str, path: Path, index: int):
|
|
@@ -128,7 +93,6 @@ class ScholarPDFDownloader:
|
|
|
128
93
|
|
|
129
94
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
130
95
|
|
|
131
|
-
# Filter suffcessful downloads
|
|
132
96
|
saved_paths = []
|
|
133
97
|
for result in results:
|
|
134
98
|
if isinstance(result, Exception):
|
|
@@ -136,9 +100,7 @@ class ScholarPDFDownloader:
|
|
|
136
100
|
elif result:
|
|
137
101
|
saved_paths.append(result)
|
|
138
102
|
|
|
139
|
-
logger.info(
|
|
140
|
-
f"{self.name}: Downloaded {len(saved_paths)}/{len(pdf_urls)} PDFs suffcessfully"
|
|
141
|
-
)
|
|
103
|
+
logger.info(f"{self.name}: Downloaded {len(saved_paths)}/{len(pdf_urls)} PDFs")
|
|
142
104
|
return saved_paths
|
|
143
105
|
|
|
144
106
|
async def download_open_access(
|
|
@@ -147,20 +109,7 @@ class ScholarPDFDownloader:
|
|
|
147
109
|
output_path: Union[str, Path],
|
|
148
110
|
metadata: Optional[dict] = None,
|
|
149
111
|
) -> Optional[Path]:
|
|
150
|
-
"""Download PDF from an Open Access URL.
|
|
151
|
-
|
|
152
|
-
This is a simpler path for known OA papers - no browser automation needed.
|
|
153
|
-
Uses direct HTTP download with appropriate handling for different OA sources
|
|
154
|
-
(arXiv, PMC, OpenAlex OA URLs, etc.).
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
oa_url: Open Access URL (from paper.metadata.access.oa_url)
|
|
158
|
-
output_path: Path to save the downloaded PDF
|
|
159
|
-
metadata: Optional paper metadata for logging
|
|
160
|
-
|
|
161
|
-
Returns:
|
|
162
|
-
Path to downloaded PDF if successful, None otherwise
|
|
163
|
-
"""
|
|
112
|
+
"""Download PDF from an Open Access URL."""
|
|
164
113
|
if not oa_url:
|
|
165
114
|
logger.debug(f"{self.name}: No OA URL provided")
|
|
166
115
|
return None
|
|
@@ -183,39 +132,19 @@ class ScholarPDFDownloader:
|
|
|
183
132
|
if result:
|
|
184
133
|
logger.info(f"{self.name}: Successfully downloaded OA PDF to {result}")
|
|
185
134
|
else:
|
|
186
|
-
logger.debug(
|
|
187
|
-
f"{self.name}: OA download failed, may need browser-based download"
|
|
188
|
-
)
|
|
135
|
+
logger.debug(f"{self.name}: OA download failed")
|
|
189
136
|
|
|
190
137
|
return result
|
|
191
138
|
|
|
192
139
|
async def download_smart(
|
|
193
|
-
self,
|
|
194
|
-
paper,
|
|
195
|
-
output_path: Union[str, Path],
|
|
140
|
+
self, paper, output_path: Union[str, Path]
|
|
196
141
|
) -> Optional[Path]:
|
|
197
|
-
"""Smart download
|
|
198
|
-
|
|
199
|
-
Priority order:
|
|
200
|
-
1. Try Open Access URL if available and prefer_open_access is True
|
|
201
|
-
2. Try regular PDF URLs if available
|
|
202
|
-
3. Try paywall access if enable_paywall_access is True and OA failed
|
|
203
|
-
|
|
204
|
-
Args:
|
|
205
|
-
paper: Paper object with metadata (from scitex.scholar.core.Paper)
|
|
206
|
-
output_path: Path to save the downloaded PDF
|
|
207
|
-
|
|
208
|
-
Returns:
|
|
209
|
-
Path to downloaded PDF if successful, None otherwise
|
|
210
|
-
"""
|
|
211
|
-
from scitex.scholar.core.Paper import Paper
|
|
212
|
-
|
|
142
|
+
"""Smart download choosing best strategy based on paper metadata."""
|
|
213
143
|
if isinstance(output_path, str):
|
|
214
144
|
output_path = Path(output_path)
|
|
215
145
|
if not str(output_path).endswith(".pdf"):
|
|
216
146
|
output_path = Path(str(output_path) + ".pdf")
|
|
217
147
|
|
|
218
|
-
# Extract metadata
|
|
219
148
|
meta = paper.metadata if hasattr(paper, "metadata") else paper
|
|
220
149
|
access = getattr(meta, "access", None)
|
|
221
150
|
url_meta = getattr(meta, "url", None)
|
|
@@ -233,7 +162,6 @@ class ScholarPDFDownloader:
|
|
|
233
162
|
logger.info(f"{self.name}: Trying Open Access URL first")
|
|
234
163
|
result = await self.download_open_access(oa_url, output_path)
|
|
235
164
|
if result:
|
|
236
|
-
# Update access metadata to record successful OA download
|
|
237
165
|
if access and self.track_paywall_attempts:
|
|
238
166
|
access.paywall_bypass_attempted = False
|
|
239
167
|
return result
|
|
@@ -253,7 +181,6 @@ class ScholarPDFDownloader:
|
|
|
253
181
|
if access and self.track_paywall_attempts:
|
|
254
182
|
access.paywall_bypass_attempted = True
|
|
255
183
|
|
|
256
|
-
# Use DOI-based URL if available
|
|
257
184
|
if doi:
|
|
258
185
|
doi_url = f"https://doi.org/{doi}"
|
|
259
186
|
result = await self.download_from_url(doi_url, output_path, doi=doi)
|
|
@@ -261,9 +188,8 @@ class ScholarPDFDownloader:
|
|
|
261
188
|
if access and self.track_paywall_attempts:
|
|
262
189
|
access.paywall_bypass_success = True
|
|
263
190
|
return result
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
access.paywall_bypass_success = False
|
|
191
|
+
elif access and self.track_paywall_attempts:
|
|
192
|
+
access.paywall_bypass_success = False
|
|
267
193
|
|
|
268
194
|
logger.warning(f"{self.name}: All download strategies exhausted for DOI={doi}")
|
|
269
195
|
return None
|
|
@@ -274,12 +200,7 @@ class ScholarPDFDownloader:
|
|
|
274
200
|
output_path: Union[str, Path],
|
|
275
201
|
doi: Optional[str] = None,
|
|
276
202
|
) -> Optional[Path]:
|
|
277
|
-
"""Main download method with manual override support.
|
|
278
|
-
|
|
279
|
-
Shows manual download button immediately - if clicked, switches to manual mode.
|
|
280
|
-
Otherwise tries automated download strategies.
|
|
281
|
-
"""
|
|
282
|
-
|
|
203
|
+
"""Main download method with manual override support."""
|
|
283
204
|
if not pdf_url:
|
|
284
205
|
logger.warning(f"{self.name}: PDF URL passed but not valid: {pdf_url}")
|
|
285
206
|
return None
|
|
@@ -290,41 +211,26 @@ class ScholarPDFDownloader:
|
|
|
290
211
|
output_path = Path(str(output_path) + ".pdf")
|
|
291
212
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
292
213
|
|
|
293
|
-
# Generate target filename for button display
|
|
294
214
|
target_filename = FlexibleFilenameGenerator.generate_filename(
|
|
295
|
-
doi=doi,
|
|
296
|
-
url=pdf_url,
|
|
297
|
-
content_type="main",
|
|
215
|
+
doi=doi, url=pdf_url, content_type="main"
|
|
298
216
|
)
|
|
299
217
|
|
|
300
|
-
# Create stop event for manual mode
|
|
301
218
|
stop_event = asyncio.Event()
|
|
219
|
+
self.context._scitex_is_manual_mode = False
|
|
220
|
+
self.context._scitex_manual_mode_event = stop_event
|
|
302
221
|
|
|
303
|
-
# Add manual mode flag to context (shared across all strategies)
|
|
304
|
-
self.context._scitex_is_manual_mode = False # Flag strategies can check
|
|
305
|
-
self.context._scitex_manual_mode_event = (
|
|
306
|
-
stop_event # Event for internal monitoring
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
# Inject manual mode button script into ALL pages in this context
|
|
310
|
-
# This ensures button appears on every page, even after redirects
|
|
311
222
|
from scitex.scholar.pdf_download.strategies.manual_download_utils import (
|
|
312
223
|
get_manual_button_init_script,
|
|
313
224
|
)
|
|
314
225
|
|
|
315
226
|
button_script = get_manual_button_init_script(target_filename)
|
|
316
227
|
await self.context.add_init_script(button_script)
|
|
317
|
-
logger.info(
|
|
318
|
-
f"{self.name}: Manual mode button injected into browser context (appears on ALL pages)"
|
|
319
|
-
)
|
|
228
|
+
logger.info(f"{self.name}: Manual mode button injected into browser context")
|
|
320
229
|
|
|
321
|
-
# Create manual mode monitoring (will be used if user presses 'M')
|
|
322
230
|
button_task = None
|
|
323
231
|
pdf_page = None
|
|
324
232
|
|
|
325
|
-
# Define download strategies with their names
|
|
326
233
|
async def chrome_pdf_wrapper(url, path):
|
|
327
|
-
# Chrome PDF strategy creates its own page
|
|
328
234
|
return await try_download_chrome_pdf_viewer_async(
|
|
329
235
|
self.context, url, path, self.name
|
|
330
236
|
)
|
|
@@ -338,8 +244,6 @@ class ScholarPDFDownloader:
|
|
|
338
244
|
)
|
|
339
245
|
|
|
340
246
|
async def manual_fallback_wrapper(url, path):
|
|
341
|
-
# Don't run manual download in the loop - it's handled separately after
|
|
342
|
-
# if stop_event is set
|
|
343
247
|
return None
|
|
344
248
|
|
|
345
249
|
try_download_methods = [
|
|
@@ -350,81 +254,61 @@ class ScholarPDFDownloader:
|
|
|
350
254
|
]
|
|
351
255
|
|
|
352
256
|
for method_name, method_func in try_download_methods:
|
|
353
|
-
# Check if user activated manual mode - STOP ALL AUTOMATION IMMEDIATELY
|
|
354
257
|
if stop_event.is_set():
|
|
355
|
-
logger.info(
|
|
356
|
-
f"{self.name}: User activated manual mode - stopping all automation"
|
|
357
|
-
)
|
|
258
|
+
logger.info(f"{self.name}: Manual mode - stopping automation")
|
|
358
259
|
break
|
|
359
260
|
|
|
360
261
|
logger.info(f"{self.name}: Trying method: {method_name}")
|
|
361
262
|
|
|
362
|
-
# Pass stop_event to strategies so they can check it periodically
|
|
363
263
|
try:
|
|
364
|
-
# Check before starting
|
|
365
264
|
if stop_event.is_set():
|
|
366
|
-
logger.info(
|
|
367
|
-
f"{self.name}: Manual mode activated, skipping {method_name}"
|
|
368
|
-
)
|
|
265
|
+
logger.info(f"{self.name}: Manual mode, skipping {method_name}")
|
|
369
266
|
break
|
|
370
267
|
|
|
371
|
-
# Run the method - it should check stop_event periodically
|
|
372
268
|
is_downloaded = await method_func(pdf_url, output_path)
|
|
373
269
|
|
|
374
|
-
# Check after completing
|
|
375
270
|
if stop_event.is_set():
|
|
376
|
-
logger.info(
|
|
377
|
-
f"{self.name}: Manual mode activated during {method_name}"
|
|
378
|
-
)
|
|
271
|
+
logger.info(f"{self.name}: Manual mode during {method_name}")
|
|
379
272
|
break
|
|
380
273
|
|
|
381
274
|
if is_downloaded:
|
|
382
|
-
# Clean up
|
|
383
275
|
if button_task:
|
|
384
276
|
button_task.cancel()
|
|
385
277
|
if pdf_page:
|
|
386
278
|
await pdf_page.close()
|
|
387
|
-
logger.info(
|
|
388
|
-
|
|
389
|
-
)
|
|
390
|
-
return is_downloaded # Return the actual path from the strategy
|
|
279
|
+
logger.info(f"{self.name}: Downloaded via {method_name}")
|
|
280
|
+
return is_downloaded
|
|
391
281
|
else:
|
|
392
|
-
logger.debug(
|
|
393
|
-
f"{self.name}: {method_name} returned None (failed or not applicable)"
|
|
394
|
-
)
|
|
282
|
+
logger.debug(f"{self.name}: {method_name} returned None")
|
|
395
283
|
except Exception as e:
|
|
396
284
|
logger.warning(f"{self.name}: {method_name} raised exception: {e}")
|
|
397
285
|
logger.debug(f"{self.name}: Traceback: {traceback.format_exc()}")
|
|
398
286
|
|
|
399
|
-
#
|
|
287
|
+
# Handle manual download if user chose it
|
|
400
288
|
if stop_event.is_set():
|
|
401
|
-
# Set context flag so all strategies know we're in manual mode
|
|
402
289
|
self.context._scitex_is_manual_mode = True
|
|
403
|
-
|
|
404
|
-
logger.info(
|
|
405
|
-
f"{self.name}: User chose manual download - starting monitoring"
|
|
406
|
-
)
|
|
407
|
-
# Cancel button task
|
|
290
|
+
logger.info(f"{self.name}: User chose manual download - starting")
|
|
408
291
|
if button_task:
|
|
409
292
|
button_task.cancel()
|
|
410
293
|
|
|
411
|
-
# Open page for manual download if not already open
|
|
412
294
|
if not pdf_page:
|
|
413
295
|
pdf_page = await self.context.new_page()
|
|
414
296
|
await pdf_page.goto(
|
|
415
297
|
pdf_url, timeout=30000, wait_until="domcontentloaded"
|
|
416
298
|
)
|
|
417
299
|
|
|
418
|
-
result = await
|
|
300
|
+
result = await handle_manual_download_on_page_async(
|
|
419
301
|
pdf_page,
|
|
420
302
|
pdf_url,
|
|
421
303
|
output_path,
|
|
304
|
+
func_name=self.name,
|
|
305
|
+
config=self.config,
|
|
422
306
|
doi=doi,
|
|
423
307
|
)
|
|
424
308
|
await pdf_page.close()
|
|
425
309
|
return result
|
|
426
310
|
|
|
427
|
-
# All methods failed
|
|
311
|
+
# All methods failed
|
|
428
312
|
if button_task:
|
|
429
313
|
button_task.cancel()
|
|
430
314
|
if pdf_page:
|
|
@@ -432,273 +316,11 @@ class ScholarPDFDownloader:
|
|
|
432
316
|
logger.fail(f"{self.name}: All download methods failed for {pdf_url}")
|
|
433
317
|
return None
|
|
434
318
|
|
|
435
|
-
# Helper functions
|
|
436
|
-
# ----------------------------------------
|
|
437
|
-
|
|
438
|
-
async def _handle_manual_download_async(
|
|
439
|
-
self, page, pdf_url: str, output_path: Path, doi: Optional[str] = None
|
|
440
|
-
) -> Optional[Path]:
|
|
441
|
-
"""
|
|
442
|
-
Handle manual download workflow when automation is stopped by user.
|
|
443
|
-
|
|
444
|
-
Args:
|
|
445
|
-
page: Playwright page where stop button was clicked
|
|
446
|
-
pdf_url: URL of the PDF
|
|
447
|
-
output_path: Target output path
|
|
448
|
-
doi: Optional DOI for filename generation
|
|
449
|
-
|
|
450
|
-
Returns:
|
|
451
|
-
Path to downloaded file, or None if failed
|
|
452
|
-
"""
|
|
453
|
-
|
|
454
|
-
# Get directories from config
|
|
455
|
-
# IMPORTANT: Manual download should ONLY save to downloads dir
|
|
456
|
-
# MASTER organization (8-digit IDs) is handled by storage module
|
|
457
|
-
temp_downloads_dir = self.config.get_library_downloads_dir()
|
|
458
|
-
final_pdfs_dir = self.config.get_library_downloads_dir() # NOT MASTER!
|
|
459
|
-
|
|
460
|
-
# Extract DOI from URL if not provided
|
|
461
|
-
if not doi and "doi.org/" in pdf_url:
|
|
462
|
-
doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
|
|
463
|
-
|
|
464
|
-
await browser_logger.info(
|
|
465
|
-
page,
|
|
466
|
-
f"{self.name}: Manual download mode activated",
|
|
467
|
-
)
|
|
468
|
-
|
|
469
|
-
# Page is already navigated to PDF URL (done in download_from_url)
|
|
470
|
-
# Just show instructions
|
|
471
|
-
await browser_logger.info(
|
|
472
|
-
page,
|
|
473
|
-
f"{self.name}: Please download the PDF manually from this page",
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
# Run complete manual download workflow (without showing button again)
|
|
477
|
-
# The button was already shown and clicked to trigger this
|
|
478
|
-
monitor = DownloadMonitorAndSync(temp_downloads_dir, final_pdfs_dir)
|
|
479
|
-
|
|
480
|
-
# Create logger function for progress reporting (must be sync, not async)
|
|
481
|
-
def log_progress(msg: str):
|
|
482
|
-
logger.info(f"{self.name}: {msg}")
|
|
483
|
-
|
|
484
|
-
# Monitor for new download with progress reporting (2 minutes)
|
|
485
|
-
# Long timeouts cause process accumulation - keep it short
|
|
486
|
-
temp_file = await monitor.monitor_for_new_download_async(
|
|
487
|
-
timeout_sec=120, # 2 minutes to download
|
|
488
|
-
logger_func=log_progress,
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
if not temp_file:
|
|
492
|
-
await browser_logger.error(
|
|
493
|
-
page,
|
|
494
|
-
f"{self.name}: No new PDF detected in downloads directory",
|
|
495
|
-
)
|
|
496
|
-
return None
|
|
497
|
-
|
|
498
|
-
await browser_logger.info(
|
|
499
|
-
page,
|
|
500
|
-
f"{self.name}: Detected PDF: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
# Keep UUID filename as-is in downloads directory
|
|
504
|
-
# Orchestration layer will handle metadata extraction and MASTER organization
|
|
505
|
-
|
|
506
|
-
# Save minimal metadata header (DOI only - no PDF parsing)
|
|
507
|
-
if doi:
|
|
508
|
-
import json
|
|
509
|
-
|
|
510
|
-
metadata_file = temp_file.parent / f"{temp_file.name}.meta.json"
|
|
511
|
-
metadata = {
|
|
512
|
-
"doi": doi,
|
|
513
|
-
"pdf_url": pdf_url,
|
|
514
|
-
"pdf_file": temp_file.name,
|
|
515
|
-
}
|
|
516
|
-
with open(metadata_file, "w") as f:
|
|
517
|
-
json.dump(metadata, f, indent=2)
|
|
518
|
-
|
|
519
|
-
await browser_logger.info(
|
|
520
|
-
page,
|
|
521
|
-
f"{self.name}: Manual download complete - saved in downloads/",
|
|
522
|
-
)
|
|
523
|
-
|
|
524
|
-
logger.info(f"{self.name}: PDF: {temp_file}")
|
|
525
|
-
if doi:
|
|
526
|
-
logger.info(
|
|
527
|
-
f"{self.name}: DOI: {doi} (saved in {temp_file.name}.meta.json)"
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
# Return the UUID file path (in downloads directory)
|
|
531
|
-
return temp_file
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
async def main_async(args):
|
|
535
|
-
"""Example usage showing decoupled URL resolution and downloading."""
|
|
536
|
-
from scitex.scholar import (
|
|
537
|
-
ScholarAuthManager,
|
|
538
|
-
ScholarBrowserManager,
|
|
539
|
-
ScholarURLFinder,
|
|
540
|
-
)
|
|
541
|
-
from scitex.scholar.auth import AuthenticationGateway
|
|
542
|
-
|
|
543
|
-
# ---------------------------------------
|
|
544
|
-
# Context Preparation
|
|
545
|
-
# ---------------------------------------
|
|
546
|
-
# Authenticated Browser and Context
|
|
547
|
-
auth_manager = ScholarAuthManager()
|
|
548
|
-
browser_manager = ScholarBrowserManager(
|
|
549
|
-
chrome_profile_name="system",
|
|
550
|
-
browser_mode=args.browser_mode,
|
|
551
|
-
auth_manager=auth_manager,
|
|
552
|
-
use_zenrows_proxy=False,
|
|
553
|
-
)
|
|
554
|
-
(
|
|
555
|
-
browser,
|
|
556
|
-
context,
|
|
557
|
-
) = await browser_manager.get_authenticated_browser_and_context_async()
|
|
558
|
-
|
|
559
|
-
# Authentication Gateway
|
|
560
|
-
auth_gateway = AuthenticationGateway(
|
|
561
|
-
auth_manager=auth_manager,
|
|
562
|
-
browser_manager=browser_manager,
|
|
563
|
-
)
|
|
564
|
-
url_context = await auth_gateway.prepare_context_async(
|
|
565
|
-
doi=args.doi, context=context
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
# ---------------------------------------
|
|
569
|
-
# Step 1: URL Resolution (separate from downloading)
|
|
570
|
-
# ---------------------------------------
|
|
571
|
-
url_finder = ScholarURLFinder(context)
|
|
572
|
-
|
|
573
|
-
# Use the resolved URL from auth_gateway to avoid duplicate OpenURL resolution
|
|
574
|
-
resolved_url = url_context.url if url_context else None
|
|
575
|
-
if resolved_url:
|
|
576
|
-
logger.info(f"{__name__}: Using resolved URL from auth_gateway: {resolved_url}")
|
|
577
|
-
urls = await url_finder.find_pdf_urls(resolved_url)
|
|
578
|
-
else:
|
|
579
|
-
logger.info(f"{__name__}: No resolved URL, using DOI: {args.doi}")
|
|
580
|
-
urls = await url_finder.find_pdf_urls(args.doi) # Will resolve DOI internally
|
|
581
|
-
|
|
582
|
-
# Extract URL strings from list of dicts
|
|
583
|
-
pdf_urls = []
|
|
584
|
-
for entry in urls:
|
|
585
|
-
if isinstance(entry, dict):
|
|
586
|
-
pdf_urls.append(entry.get("url"))
|
|
587
|
-
elif isinstance(entry, str):
|
|
588
|
-
pdf_urls.append(entry)
|
|
589
|
-
|
|
590
|
-
if not pdf_urls:
|
|
591
|
-
logger.error(f"No PDF URLs found for DOI: {args.doi}")
|
|
592
|
-
return
|
|
593
|
-
|
|
594
|
-
logger.info(f"Found {len(pdf_urls)} PDF URL(s) for DOI: {args.doi}")
|
|
595
|
-
|
|
596
|
-
# ---------------------------------------
|
|
597
|
-
# Step 2: PDF Download (URL-only, decoupled from DOI resolution)
|
|
598
|
-
# ---------------------------------------
|
|
599
|
-
pdf_downloader = ScholarPDFDownloader(context)
|
|
600
|
-
|
|
601
|
-
if len(pdf_urls) == 1:
|
|
602
|
-
# Single URL - direct download
|
|
603
|
-
await pdf_downloader.download_from_url(pdf_urls[0], args.output)
|
|
604
|
-
else:
|
|
605
|
-
# Multiple URLs - batch download with parallelization
|
|
606
|
-
output_dir = Path(args.output).parent
|
|
607
|
-
await pdf_downloader.download_from_urls(
|
|
608
|
-
pdf_urls,
|
|
609
|
-
output_dir=output_dir,
|
|
610
|
-
max_concurrent=3,
|
|
611
|
-
)
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
def main(args):
|
|
615
|
-
import asyncio
|
|
616
|
-
|
|
617
|
-
asyncio.run(main_async(args))
|
|
618
|
-
|
|
619
|
-
return 0
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
def parse_args() -> argparse.Namespace:
|
|
623
|
-
"""Parse command line arguments."""
|
|
624
|
-
parser = argparse.ArgumentParser(
|
|
625
|
-
description="Download a PDF using DOI with authentication support"
|
|
626
|
-
)
|
|
627
|
-
parser.add_argument(
|
|
628
|
-
"--doi",
|
|
629
|
-
type=str,
|
|
630
|
-
required=True,
|
|
631
|
-
help="DOI of the paper (e.g., 10.1088/1741-2552/aaf92e)",
|
|
632
|
-
)
|
|
633
|
-
parser.add_argument(
|
|
634
|
-
"--output",
|
|
635
|
-
type=str,
|
|
636
|
-
default="~/.scitex/scholar/library/downloads/downloaded_paper.pdf",
|
|
637
|
-
help="Output path for the PDF (default: ~/.scitex/scholar/library/downloads/downloaded_paper.pdf)",
|
|
638
|
-
)
|
|
639
|
-
parser.add_argument(
|
|
640
|
-
"--browser-mode",
|
|
641
|
-
type=str,
|
|
642
|
-
choices=["stealth", "interactive"],
|
|
643
|
-
default="stealth",
|
|
644
|
-
help="Browser mode (default: stealth)",
|
|
645
|
-
)
|
|
646
|
-
|
|
647
|
-
args = parser.parse_args()
|
|
648
|
-
return args
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
def run_main() -> None:
|
|
652
|
-
"""Initialize scitex framework, run main function, and cleanup."""
|
|
653
|
-
global CONFIG, CC, sys, plt, rng
|
|
654
|
-
|
|
655
|
-
import sys
|
|
656
|
-
|
|
657
|
-
import matplotlib.pyplot as plt
|
|
658
|
-
|
|
659
|
-
import scitex as stx
|
|
660
|
-
|
|
661
|
-
args = parse_args()
|
|
662
|
-
|
|
663
|
-
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
|
|
664
|
-
sys,
|
|
665
|
-
plt,
|
|
666
|
-
args=args,
|
|
667
|
-
file=__FILE__,
|
|
668
|
-
sdir_suffix=None,
|
|
669
|
-
verbose=False,
|
|
670
|
-
agg=True,
|
|
671
|
-
)
|
|
672
|
-
|
|
673
|
-
exit_status = main(args)
|
|
674
|
-
|
|
675
|
-
stx.session.close(
|
|
676
|
-
CONFIG,
|
|
677
|
-
verbose=False,
|
|
678
|
-
notify=False,
|
|
679
|
-
message="",
|
|
680
|
-
exit_status=exit_status,
|
|
681
|
-
)
|
|
682
|
-
|
|
683
319
|
|
|
320
|
+
# CLI entry point moved to _cli.py
|
|
684
321
|
if __name__ == "__main__":
|
|
685
|
-
run_main
|
|
686
|
-
|
|
687
|
-
"""
|
|
688
|
-
python -m scitex.scholar.download.ScholarPDFDownloader \
|
|
689
|
-
--browser-mode interactive \
|
|
690
|
-
--doi "10.1016/j.clinph.2024.09.017"
|
|
691
|
-
|
|
692
|
-
python -m scitex.scholar.download.ScholarPDFDownloader \
|
|
693
|
-
--browser-mode interactive \
|
|
694
|
-
--doi "10.1212/wnl.0000000000200348"
|
|
322
|
+
from scitex.scholar.pdf_download._cli import run_main
|
|
695
323
|
|
|
696
|
-
|
|
697
|
-
# This seems calling URL Resolution on OpenURL twice
|
|
698
|
-
|
|
699
|
-
--doi "10.3389/fnins.2024.1417748"
|
|
700
|
-
--doi "10.1016/j.clinph.2024.09.017"
|
|
701
|
-
|
|
702
|
-
"""
|
|
324
|
+
run_main()
|
|
703
325
|
|
|
704
326
|
# EOF
|