scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-13 07:54:07 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/ScholarPDFDownloader.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/pdf_download/ScholarPDFDownloader.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
|
|
16
|
+
__FILE__ = __file__
|
|
17
|
+
import asyncio
|
|
18
|
+
import hashlib
|
|
19
|
+
import traceback
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import List, Optional, Union
|
|
22
|
+
|
|
23
|
+
from playwright.async_api import BrowserContext
|
|
24
|
+
|
|
25
|
+
from scitex import logging
|
|
26
|
+
from scitex.browser.debugging import browser_logger
|
|
27
|
+
from scitex.scholar import ScholarConfig
|
|
28
|
+
from scitex.scholar.pdf_download.strategies import (
|
|
29
|
+
DownloadMonitorAndSync,
|
|
30
|
+
FlexibleFilenameGenerator,
|
|
31
|
+
show_stop_automation_button_async,
|
|
32
|
+
try_download_chrome_pdf_viewer_async,
|
|
33
|
+
try_download_direct_async,
|
|
34
|
+
try_download_manual_async,
|
|
35
|
+
try_download_response_body_async,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ScholarPDFDownloader:
|
|
42
|
+
"""Download PDFs from URLs with multiple fallback strategies.
|
|
43
|
+
|
|
44
|
+
This class focuses solely on downloading PDFs from URLs using various strategies:
|
|
45
|
+
- Chrome PDF Viewer
|
|
46
|
+
- Direct Download (ERR_ABORTED)
|
|
47
|
+
- Response Body Extraction
|
|
48
|
+
- Manual Download Fallback
|
|
49
|
+
|
|
50
|
+
URL resolution (DOI → URL) should be handled by the caller.
|
|
51
|
+
|
|
52
|
+
Logging Strategy:
|
|
53
|
+
- Uses `logger` for terminal-only logs (batch operations, coordination)
|
|
54
|
+
- Uses `await browser_logger` for browser automation logs (visual popups)
|
|
55
|
+
- All messages prefixed with self.name for traceability
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
context: BrowserContext,
|
|
61
|
+
config: ScholarConfig = None,
|
|
62
|
+
):
|
|
63
|
+
self.name = self.__class__.__name__
|
|
64
|
+
self.config = config if config else ScholarConfig()
|
|
65
|
+
self.context = context
|
|
66
|
+
self.output_dir = self.config.get_library_downloads_dir()
|
|
67
|
+
|
|
68
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
# Main entry points
|
|
72
|
+
# ----------------------------------------
|
|
73
|
+
|
|
74
|
+
async def download_from_urls(
|
|
75
|
+
self,
|
|
76
|
+
pdf_urls: List[str],
|
|
77
|
+
output_dir: Union[str, Path] = None,
|
|
78
|
+
max_concurrent: int = 3,
|
|
79
|
+
) -> List[Path]:
|
|
80
|
+
"""Download multiple PDFs with parallel processing.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
pdf_urls: List of PDF URLs to download
|
|
84
|
+
output_dir: Output directory for downloaded PDFs
|
|
85
|
+
max_concurrent: Maximum number of concurrent downloads (default: 3)
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of paths to suffcessfully downloaded PDFs
|
|
89
|
+
"""
|
|
90
|
+
output_dir = output_dir or self.output_dir
|
|
91
|
+
|
|
92
|
+
if not pdf_urls:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
output_paths = [
|
|
96
|
+
output_dir / f"{ii_pdf:03d}_{os.path.basename(pdf_url)}"
|
|
97
|
+
for ii_pdf, pdf_url in enumerate(pdf_urls)
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
# Use semaphore for controlled parallelization
|
|
101
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
102
|
+
|
|
103
|
+
async def download_with_semaphore(url: str, path: Path, index: int):
|
|
104
|
+
async with semaphore:
|
|
105
|
+
logger.info(
|
|
106
|
+
f"{self.name}: Downloading PDF {index}/{len(pdf_urls)}: {url}"
|
|
107
|
+
)
|
|
108
|
+
result = await self.download_from_url(url, path)
|
|
109
|
+
if result:
|
|
110
|
+
logger.info(f"{self.name}: Downloaded to {result}")
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
tasks = [
|
|
114
|
+
download_with_semaphore(url, path, idx + 1)
|
|
115
|
+
for idx, (url, path) in enumerate(zip(pdf_urls, output_paths))
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
119
|
+
|
|
120
|
+
# Filter suffcessful downloads
|
|
121
|
+
saved_paths = []
|
|
122
|
+
for result in results:
|
|
123
|
+
if isinstance(result, Exception):
|
|
124
|
+
logger.debug(f"{self.name}: Download error: {result}")
|
|
125
|
+
elif result:
|
|
126
|
+
saved_paths.append(result)
|
|
127
|
+
|
|
128
|
+
logger.info(
|
|
129
|
+
f"{self.name}: Downloaded {len(saved_paths)}/{len(pdf_urls)} PDFs suffcessfully"
|
|
130
|
+
)
|
|
131
|
+
return saved_paths
|
|
132
|
+
|
|
133
|
+
async def download_from_url(
|
|
134
|
+
self,
|
|
135
|
+
pdf_url: str,
|
|
136
|
+
output_path: Union[str, Path],
|
|
137
|
+
doi: Optional[str] = None,
|
|
138
|
+
) -> Optional[Path]:
|
|
139
|
+
"""Main download method with manual override support.
|
|
140
|
+
|
|
141
|
+
Shows manual download button immediately - if clicked, switches to manual mode.
|
|
142
|
+
Otherwise tries automated download strategies.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
if not pdf_url:
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"{self.name}: PDF URL passed but not valid: {pdf_url}"
|
|
148
|
+
)
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
if isinstance(output_path, str):
|
|
152
|
+
output_path = Path(output_path)
|
|
153
|
+
if not str(output_path).endswith(".pdf"):
|
|
154
|
+
output_path = Path(str(output_path) + ".pdf")
|
|
155
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
|
|
157
|
+
# Generate target filename for button display
|
|
158
|
+
target_filename = FlexibleFilenameGenerator.generate_filename(
|
|
159
|
+
doi=doi,
|
|
160
|
+
url=pdf_url,
|
|
161
|
+
content_type="main",
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Create stop event for manual mode
|
|
165
|
+
stop_event = asyncio.Event()
|
|
166
|
+
|
|
167
|
+
# Add manual mode flag to context (shared across all strategies)
|
|
168
|
+
self.context._scitex_is_manual_mode = (
|
|
169
|
+
False # Flag strategies can check
|
|
170
|
+
)
|
|
171
|
+
self.context._scitex_manual_mode_event = (
|
|
172
|
+
stop_event # Event for internal monitoring
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Inject manual mode button script into ALL pages in this context
|
|
176
|
+
# This ensures button appears on every page, even after redirects
|
|
177
|
+
from scitex.scholar.pdf_download.strategies.manual_download_utils import (
|
|
178
|
+
get_manual_button_init_script,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
button_script = get_manual_button_init_script(target_filename)
|
|
182
|
+
await self.context.add_init_script(button_script)
|
|
183
|
+
logger.info(
|
|
184
|
+
f"{self.name}: Manual mode button injected into browser context (appears on ALL pages)"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Create manual mode monitoring (will be used if user presses 'M')
|
|
188
|
+
button_task = None
|
|
189
|
+
pdf_page = None
|
|
190
|
+
|
|
191
|
+
# Define download strategies with their names
|
|
192
|
+
async def chrome_pdf_wrapper(url, path):
|
|
193
|
+
# Chrome PDF strategy creates its own page
|
|
194
|
+
return await try_download_chrome_pdf_viewer_async(
|
|
195
|
+
self.context, url, path, self.name
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
async def direct_download_wrapper(url, path):
|
|
199
|
+
return await try_download_direct_async(
|
|
200
|
+
self.context, url, path, self.name
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
async def response_body_wrapper(url, path):
|
|
204
|
+
return await try_download_response_body_async(
|
|
205
|
+
self.context, url, path, self.name
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
async def manual_fallback_wrapper(url, path):
|
|
209
|
+
# Don't run manual download in the loop - it's handled separately after
|
|
210
|
+
# if stop_event is set
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
try_download_methods = [
|
|
214
|
+
("Chrome PDF", chrome_pdf_wrapper),
|
|
215
|
+
("Direct Download", direct_download_wrapper),
|
|
216
|
+
("From Response Body", response_body_wrapper),
|
|
217
|
+
("Manual Download", manual_fallback_wrapper),
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
for method_name, method_func in try_download_methods:
|
|
221
|
+
# Check if user activated manual mode - STOP ALL AUTOMATION IMMEDIATELY
|
|
222
|
+
if stop_event.is_set():
|
|
223
|
+
logger.info(
|
|
224
|
+
f"{self.name}: User activated manual mode - stopping all automation"
|
|
225
|
+
)
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
logger.info(f"{self.name}: Trying method: {method_name}")
|
|
229
|
+
|
|
230
|
+
# Pass stop_event to strategies so they can check it periodically
|
|
231
|
+
try:
|
|
232
|
+
# Check before starting
|
|
233
|
+
if stop_event.is_set():
|
|
234
|
+
logger.info(
|
|
235
|
+
f"{self.name}: Manual mode activated, skipping {method_name}"
|
|
236
|
+
)
|
|
237
|
+
break
|
|
238
|
+
|
|
239
|
+
# Run the method - it should check stop_event periodically
|
|
240
|
+
is_downloaded = await method_func(pdf_url, output_path)
|
|
241
|
+
|
|
242
|
+
# Check after completing
|
|
243
|
+
if stop_event.is_set():
|
|
244
|
+
logger.info(
|
|
245
|
+
f"{self.name}: Manual mode activated during {method_name}"
|
|
246
|
+
)
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
if is_downloaded:
|
|
250
|
+
# Clean up
|
|
251
|
+
if button_task:
|
|
252
|
+
button_task.cancel()
|
|
253
|
+
if pdf_page:
|
|
254
|
+
await pdf_page.close()
|
|
255
|
+
logger.info(
|
|
256
|
+
f"{self.name}: Suffcessfully downloaded via {method_name}"
|
|
257
|
+
)
|
|
258
|
+
return is_downloaded # Return the actual path from the strategy
|
|
259
|
+
else:
|
|
260
|
+
logger.debug(
|
|
261
|
+
f"{self.name}: {method_name} returned None (failed or not applicable)"
|
|
262
|
+
)
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.warning(
|
|
265
|
+
f"{self.name}: {method_name} raised exception: {e}"
|
|
266
|
+
)
|
|
267
|
+
logger.debug(
|
|
268
|
+
f"{self.name}: Traceback: {traceback.format_exc()}"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# If user chose manual download or all automation failed
|
|
272
|
+
if stop_event.is_set():
|
|
273
|
+
# Set context flag so all strategies know we're in manual mode
|
|
274
|
+
self.context._scitex_is_manual_mode = True
|
|
275
|
+
|
|
276
|
+
logger.info(
|
|
277
|
+
f"{self.name}: User chose manual download - starting monitoring"
|
|
278
|
+
)
|
|
279
|
+
# Cancel button task
|
|
280
|
+
if button_task:
|
|
281
|
+
button_task.cancel()
|
|
282
|
+
|
|
283
|
+
# Open page for manual download if not already open
|
|
284
|
+
if not pdf_page:
|
|
285
|
+
pdf_page = await self.context.new_page()
|
|
286
|
+
await pdf_page.goto(
|
|
287
|
+
pdf_url, timeout=30000, wait_until="domcontentloaded"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
result = await self._handle_manual_download_async(
|
|
291
|
+
pdf_page,
|
|
292
|
+
pdf_url,
|
|
293
|
+
output_path,
|
|
294
|
+
doi=doi,
|
|
295
|
+
)
|
|
296
|
+
await pdf_page.close()
|
|
297
|
+
return result
|
|
298
|
+
|
|
299
|
+
# All methods failed - clean up
|
|
300
|
+
if button_task:
|
|
301
|
+
button_task.cancel()
|
|
302
|
+
if pdf_page:
|
|
303
|
+
await pdf_page.close()
|
|
304
|
+
logger.fail(f"{self.name}: All download methods failed for {pdf_url}")
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
# Helper functions
|
|
308
|
+
# ----------------------------------------
|
|
309
|
+
|
|
310
|
+
async def _handle_manual_download_async(
|
|
311
|
+
self, page, pdf_url: str, output_path: Path, doi: Optional[str] = None
|
|
312
|
+
) -> Optional[Path]:
|
|
313
|
+
"""
|
|
314
|
+
Handle manual download workflow when automation is stopped by user.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
page: Playwright page where stop button was clicked
|
|
318
|
+
pdf_url: URL of the PDF
|
|
319
|
+
output_path: Target output path
|
|
320
|
+
doi: Optional DOI for filename generation
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Path to downloaded file, or None if failed
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
# Get directories from config
|
|
327
|
+
# IMPORTANT: Manual download should ONLY save to downloads dir
|
|
328
|
+
# MASTER organization (8-digit IDs) is handled by storage module
|
|
329
|
+
temp_downloads_dir = self.config.get_library_downloads_dir()
|
|
330
|
+
final_pdfs_dir = self.config.get_library_downloads_dir() # NOT MASTER!
|
|
331
|
+
|
|
332
|
+
# Extract DOI from URL if not provided
|
|
333
|
+
if not doi and "doi.org/" in pdf_url:
|
|
334
|
+
doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
|
|
335
|
+
|
|
336
|
+
await browser_logger.info(
|
|
337
|
+
page,
|
|
338
|
+
f"{self.name}: Manual download mode activated",
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Page is already navigated to PDF URL (done in download_from_url)
|
|
342
|
+
# Just show instructions
|
|
343
|
+
await browser_logger.info(
|
|
344
|
+
page,
|
|
345
|
+
f"{self.name}: Please download the PDF manually from this page",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Run complete manual download workflow (without showing button again)
|
|
349
|
+
# The button was already shown and clicked to trigger this
|
|
350
|
+
monitor = DownloadMonitorAndSync(temp_downloads_dir, final_pdfs_dir)
|
|
351
|
+
|
|
352
|
+
# Create logger function for progress reporting (must be sync, not async)
|
|
353
|
+
def log_progress(msg: str):
|
|
354
|
+
logger.info(f"{self.name}: {msg}")
|
|
355
|
+
|
|
356
|
+
# Monitor for new download with progress reporting (2 minutes)
|
|
357
|
+
# Long timeouts cause process accumulation - keep it short
|
|
358
|
+
temp_file = await monitor.monitor_for_new_download_async(
|
|
359
|
+
timeout_sec=120, # 2 minutes to download
|
|
360
|
+
logger_func=log_progress,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
if not temp_file:
|
|
364
|
+
await browser_logger.error(
|
|
365
|
+
page,
|
|
366
|
+
f"{self.name}: No new PDF detected in downloads directory",
|
|
367
|
+
)
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
await browser_logger.info(
|
|
371
|
+
page,
|
|
372
|
+
f"{self.name}: Detected PDF: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Keep UUID filename as-is in downloads directory
|
|
376
|
+
# Orchestration layer will handle metadata extraction and MASTER organization
|
|
377
|
+
|
|
378
|
+
# Save minimal metadata header (DOI only - no PDF parsing)
|
|
379
|
+
if doi:
|
|
380
|
+
import json
|
|
381
|
+
|
|
382
|
+
metadata_file = temp_file.parent / f"{temp_file.name}.meta.json"
|
|
383
|
+
metadata = {
|
|
384
|
+
"doi": doi,
|
|
385
|
+
"pdf_url": pdf_url,
|
|
386
|
+
"pdf_file": temp_file.name,
|
|
387
|
+
}
|
|
388
|
+
with open(metadata_file, "w") as f:
|
|
389
|
+
json.dump(metadata, f, indent=2)
|
|
390
|
+
|
|
391
|
+
await browser_logger.info(
|
|
392
|
+
page,
|
|
393
|
+
f"{self.name}: Manual download complete - saved in downloads/",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
logger.info(f"{self.name}: PDF: {temp_file}")
|
|
397
|
+
if doi:
|
|
398
|
+
logger.info(
|
|
399
|
+
f"{self.name}: DOI: {doi} (saved in {temp_file.name}.meta.json)"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Return the UUID file path (in downloads directory)
|
|
403
|
+
return temp_file
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
async def main_async(args):
|
|
407
|
+
"""Example usage showing decoupled URL resolution and downloading."""
|
|
408
|
+
from scitex.scholar import (
|
|
409
|
+
ScholarAuthManager,
|
|
410
|
+
ScholarBrowserManager,
|
|
411
|
+
ScholarURLFinder,
|
|
412
|
+
)
|
|
413
|
+
from scitex.scholar.auth import AuthenticationGateway
|
|
414
|
+
|
|
415
|
+
# ---------------------------------------
|
|
416
|
+
# Context Preparation
|
|
417
|
+
# ---------------------------------------
|
|
418
|
+
# Authenticated Browser and Context
|
|
419
|
+
auth_manager = ScholarAuthManager()
|
|
420
|
+
browser_manager = ScholarBrowserManager(
|
|
421
|
+
chrome_profile_name="system",
|
|
422
|
+
browser_mode=args.browser_mode,
|
|
423
|
+
auth_manager=auth_manager,
|
|
424
|
+
use_zenrows_proxy=False,
|
|
425
|
+
)
|
|
426
|
+
browser, context = (
|
|
427
|
+
await browser_manager.get_authenticated_browser_and_context_async()
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Authentication Gateway
|
|
431
|
+
auth_gateway = AuthenticationGateway(
|
|
432
|
+
auth_manager=auth_manager,
|
|
433
|
+
browser_manager=browser_manager,
|
|
434
|
+
)
|
|
435
|
+
url_context = await auth_gateway.prepare_context_async(
|
|
436
|
+
doi=args.doi, context=context
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# ---------------------------------------
|
|
440
|
+
# Step 1: URL Resolution (separate from downloading)
|
|
441
|
+
# ---------------------------------------
|
|
442
|
+
url_finder = ScholarURLFinder(context)
|
|
443
|
+
|
|
444
|
+
# Use the resolved URL from auth_gateway to avoid duplicate OpenURL resolution
|
|
445
|
+
resolved_url = url_context.url if url_context else None
|
|
446
|
+
if resolved_url:
|
|
447
|
+
logger.info(
|
|
448
|
+
f"{__name__}: Using resolved URL from auth_gateway: {resolved_url}"
|
|
449
|
+
)
|
|
450
|
+
urls = await url_finder.find_pdf_urls(resolved_url)
|
|
451
|
+
else:
|
|
452
|
+
logger.info(f"{__name__}: No resolved URL, using DOI: {args.doi}")
|
|
453
|
+
urls = await url_finder.find_pdf_urls(
|
|
454
|
+
args.doi
|
|
455
|
+
) # Will resolve DOI internally
|
|
456
|
+
|
|
457
|
+
# Extract URL strings from list of dicts
|
|
458
|
+
pdf_urls = []
|
|
459
|
+
for entry in urls:
|
|
460
|
+
if isinstance(entry, dict):
|
|
461
|
+
pdf_urls.append(entry.get("url"))
|
|
462
|
+
elif isinstance(entry, str):
|
|
463
|
+
pdf_urls.append(entry)
|
|
464
|
+
|
|
465
|
+
if not pdf_urls:
|
|
466
|
+
logger.error(f"No PDF URLs found for DOI: {args.doi}")
|
|
467
|
+
return
|
|
468
|
+
|
|
469
|
+
logger.info(f"Found {len(pdf_urls)} PDF URL(s) for DOI: {args.doi}")
|
|
470
|
+
|
|
471
|
+
# ---------------------------------------
|
|
472
|
+
# Step 2: PDF Download (URL-only, decoupled from DOI resolution)
|
|
473
|
+
# ---------------------------------------
|
|
474
|
+
pdf_downloader = ScholarPDFDownloader(context)
|
|
475
|
+
|
|
476
|
+
if len(pdf_urls) == 1:
|
|
477
|
+
# Single URL - direct download
|
|
478
|
+
await pdf_downloader.download_from_url(pdf_urls[0], args.output)
|
|
479
|
+
else:
|
|
480
|
+
# Multiple URLs - batch download with parallelization
|
|
481
|
+
output_dir = Path(args.output).parent
|
|
482
|
+
await pdf_downloader.download_from_urls(
|
|
483
|
+
pdf_urls,
|
|
484
|
+
output_dir=output_dir,
|
|
485
|
+
max_concurrent=3,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def main(args):
|
|
490
|
+
import asyncio
|
|
491
|
+
|
|
492
|
+
asyncio.run(main_async(args))
|
|
493
|
+
|
|
494
|
+
return 0
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def parse_args() -> argparse.Namespace:
|
|
498
|
+
"""Parse command line arguments."""
|
|
499
|
+
parser = argparse.ArgumentParser(
|
|
500
|
+
description="Download a PDF using DOI with authentication support"
|
|
501
|
+
)
|
|
502
|
+
parser.add_argument(
|
|
503
|
+
"--doi",
|
|
504
|
+
type=str,
|
|
505
|
+
required=True,
|
|
506
|
+
help="DOI of the paper (e.g., 10.1088/1741-2552/aaf92e)",
|
|
507
|
+
)
|
|
508
|
+
parser.add_argument(
|
|
509
|
+
"--output",
|
|
510
|
+
type=str,
|
|
511
|
+
default="~/.scitex/scholar/library/downloads/downloaded_paper.pdf",
|
|
512
|
+
help="Output path for the PDF (default: ~/.scitex/scholar/library/downloads/downloaded_paper.pdf)",
|
|
513
|
+
)
|
|
514
|
+
parser.add_argument(
|
|
515
|
+
"--browser-mode",
|
|
516
|
+
type=str,
|
|
517
|
+
choices=["stealth", "interactive"],
|
|
518
|
+
default="stealth",
|
|
519
|
+
help="Browser mode (default: stealth)",
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
args = parser.parse_args()
|
|
523
|
+
return args
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def run_main() -> None:
|
|
527
|
+
"""Initialize scitex framework, run main function, and cleanup."""
|
|
528
|
+
global CONFIG, CC, sys, plt, rng
|
|
529
|
+
|
|
530
|
+
import sys
|
|
531
|
+
|
|
532
|
+
import matplotlib.pyplot as plt
|
|
533
|
+
|
|
534
|
+
import scitex as stx
|
|
535
|
+
|
|
536
|
+
args = parse_args()
|
|
537
|
+
|
|
538
|
+
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
|
|
539
|
+
sys,
|
|
540
|
+
plt,
|
|
541
|
+
args=args,
|
|
542
|
+
file=__FILE__,
|
|
543
|
+
sdir_suffix=None,
|
|
544
|
+
verbose=False,
|
|
545
|
+
agg=True,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
exit_status = main(args)
|
|
549
|
+
|
|
550
|
+
stx.session.close(
|
|
551
|
+
CONFIG,
|
|
552
|
+
verbose=False,
|
|
553
|
+
notify=False,
|
|
554
|
+
message="",
|
|
555
|
+
exit_status=exit_status,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
if __name__ == "__main__":
|
|
560
|
+
run_main()
|
|
561
|
+
|
|
562
|
+
"""
|
|
563
|
+
python -m scitex.scholar.download.ScholarPDFDownloader \
|
|
564
|
+
--browser-mode interactive \
|
|
565
|
+
--doi "10.1016/j.clinph.2024.09.017"
|
|
566
|
+
|
|
567
|
+
python -m scitex.scholar.download.ScholarPDFDownloader \
|
|
568
|
+
--browser-mode interactive \
|
|
569
|
+
--doi "10.1212/wnl.0000000000200348"
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
# This seems calling URL Resolution on OpenURL twice
|
|
573
|
+
|
|
574
|
+
--doi "10.3389/fnins.2024.1417748"
|
|
575
|
+
--doi "10.1016/j.clinph.2024.09.017"
|
|
576
|
+
|
|
577
|
+
"""
|
|
578
|
+
|
|
579
|
+
# EOF
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""PDF Download Strategies
|
|
4
|
+
|
|
5
|
+
This module contains different strategies for downloading PDFs from academic publishers.
|
|
6
|
+
Each strategy is tried in sequence until one succeeds.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
# Download strategies
|
|
10
|
+
from .chrome_pdf_viewer import try_download_chrome_pdf_viewer_async
|
|
11
|
+
from .direct_download import try_download_direct_async
|
|
12
|
+
from .response_body import try_download_response_body_async
|
|
13
|
+
from .manual_download_fallback import try_download_manual_async
|
|
14
|
+
|
|
15
|
+
# Manual download utilities
|
|
16
|
+
from .manual_download_utils import (
|
|
17
|
+
DownloadMonitorAndSync,
|
|
18
|
+
FlexibleFilenameGenerator,
|
|
19
|
+
show_stop_automation_button_async,
|
|
20
|
+
show_manual_download_button_async,
|
|
21
|
+
complete_manual_download_workflow_async,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
# Download strategies
|
|
26
|
+
"try_download_chrome_pdf_viewer_async",
|
|
27
|
+
"try_download_direct_async",
|
|
28
|
+
"try_download_response_body_async",
|
|
29
|
+
"try_download_manual_async",
|
|
30
|
+
# Manual download utilities
|
|
31
|
+
"DownloadMonitorAndSync",
|
|
32
|
+
"FlexibleFilenameGenerator",
|
|
33
|
+
"show_stop_automation_button_async",
|
|
34
|
+
"show_manual_download_button_async",
|
|
35
|
+
"complete_manual_download_workflow_async",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# EOF
|