scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-13 08:18:35 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
"""Chrome PDF Viewer Download Strategy"""
|
|
15
|
+
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from playwright.async_api import BrowserContext
|
|
20
|
+
|
|
21
|
+
from scitex import logging
|
|
22
|
+
from scitex.browser.stealth import HumanBehavior
|
|
23
|
+
from scitex.scholar.browser import (
|
|
24
|
+
browser_logger,
|
|
25
|
+
click_center_async,
|
|
26
|
+
click_download_for_chrome_pdf_viewer_async,
|
|
27
|
+
detect_chrome_pdf_viewer_async,
|
|
28
|
+
show_grid_async,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def try_download_chrome_pdf_viewer_async(
|
|
35
|
+
context: BrowserContext,
|
|
36
|
+
pdf_url: str,
|
|
37
|
+
output_path: Path,
|
|
38
|
+
func_name: str = "ScholarPDFDownloader",
|
|
39
|
+
) -> Optional[Path]:
|
|
40
|
+
"""Download PDF from Chrome PDF viewer with human-like behavior."""
|
|
41
|
+
page = None
|
|
42
|
+
try:
|
|
43
|
+
# Ensure output_path is Path object
|
|
44
|
+
if not isinstance(output_path, Path):
|
|
45
|
+
output_path = Path(output_path)
|
|
46
|
+
|
|
47
|
+
logger.debug(f"{func_name}: Chrome PDF: Starting download")
|
|
48
|
+
logger.debug(f" URL: {pdf_url} (type: {type(pdf_url)})")
|
|
49
|
+
logger.debug(f" Output: {output_path} (type: {type(output_path)})")
|
|
50
|
+
logger.debug(f" Downloader: {func_name} (type: {type(func_name)})")
|
|
51
|
+
|
|
52
|
+
page = await context.new_page()
|
|
53
|
+
|
|
54
|
+
# Get browser's download directory and capture files before download
|
|
55
|
+
import time
|
|
56
|
+
|
|
57
|
+
from scitex.scholar.config import ScholarConfig
|
|
58
|
+
|
|
59
|
+
config = ScholarConfig()
|
|
60
|
+
browser_downloads_dir = config.get_library_downloads_dir()
|
|
61
|
+
files_before = (
|
|
62
|
+
set(browser_downloads_dir.glob("*"))
|
|
63
|
+
if browser_downloads_dir.exists()
|
|
64
|
+
else set()
|
|
65
|
+
)
|
|
66
|
+
download_start_time = time.time()
|
|
67
|
+
logger.info(
|
|
68
|
+
f"{func_name}: Monitoring {browser_downloads_dir} ({len(files_before)} files)"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Step 1: Navigate and wait for networkidle
|
|
72
|
+
await browser_logger.debug(
|
|
73
|
+
page, f"{func_name}: Chrome PDF: Navigating to URL..."
|
|
74
|
+
)
|
|
75
|
+
await browser_logger.info(
|
|
76
|
+
page,
|
|
77
|
+
f"{func_name}: Chrome PDF: Navigating to {str(pdf_url)[:60]}...",
|
|
78
|
+
)
|
|
79
|
+
# Create HumanBehavior instance for delays
|
|
80
|
+
human = HumanBehavior()
|
|
81
|
+
await human.random_delay_async(1000, 2000, page=page)
|
|
82
|
+
|
|
83
|
+
# Navigate and wait for initial networkidle
|
|
84
|
+
await page.goto(str(pdf_url), wait_until="networkidle", timeout=60_000)
|
|
85
|
+
await browser_logger.debug(
|
|
86
|
+
page,
|
|
87
|
+
f"{func_name}: Chrome PDF: Loaded page at {str(page.url)}",
|
|
88
|
+
)
|
|
89
|
+
await browser_logger.info(
|
|
90
|
+
page,
|
|
91
|
+
f"{func_name}: Chrome PDF: Initial load at {str(page.url)[:80]}",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Step 2: Wait for PDF rendering and any post-load network activity
|
|
95
|
+
await browser_logger.debug(
|
|
96
|
+
page,
|
|
97
|
+
f"{func_name}: Chrome PDF: Waiting for PDF rendering...",
|
|
98
|
+
)
|
|
99
|
+
await browser_logger.info(
|
|
100
|
+
page,
|
|
101
|
+
f"{func_name}: Chrome PDF: Waiting for PDF rendering (networkidle)...",
|
|
102
|
+
)
|
|
103
|
+
try:
|
|
104
|
+
# Wait for network to be fully idle (catches post-load PDF requests)
|
|
105
|
+
await page.wait_for_load_state("networkidle", timeout=30_000)
|
|
106
|
+
await browser_logger.info(
|
|
107
|
+
page,
|
|
108
|
+
f"{func_name}: Chrome PDF: Network idle, PDF should be rendered",
|
|
109
|
+
)
|
|
110
|
+
await browser_logger.info(
|
|
111
|
+
page,
|
|
112
|
+
f"{func_name}: Chrome PDF: ✓ Network idle, PDF rendered",
|
|
113
|
+
)
|
|
114
|
+
await page.wait_for_timeout(2000)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
await browser_logger.debug(
|
|
117
|
+
page,
|
|
118
|
+
f"{func_name}: Network idle timeout (non-fatal): {e}",
|
|
119
|
+
)
|
|
120
|
+
await browser_logger.info(
|
|
121
|
+
page,
|
|
122
|
+
f"{func_name}: Chrome PDF: Network still active, continuing anyway",
|
|
123
|
+
)
|
|
124
|
+
await page.wait_for_timeout(2000)
|
|
125
|
+
|
|
126
|
+
# Step 2.5: Extra wait for PDF viewer iframe/embed to fully load
|
|
127
|
+
# Chrome PDF viewer can take additional time to initialize
|
|
128
|
+
await browser_logger.info(
|
|
129
|
+
page,
|
|
130
|
+
f"{func_name}: Chrome PDF: Waiting extra for PDF viewer to initialize (10s)...",
|
|
131
|
+
)
|
|
132
|
+
await page.wait_for_timeout(10000) # Additional 10 seconds
|
|
133
|
+
|
|
134
|
+
# Step 3: Detect PDF viewer
|
|
135
|
+
await browser_logger.debug(
|
|
136
|
+
page, f"{func_name}: Chrome PDF: Detecting PDF viewer..."
|
|
137
|
+
)
|
|
138
|
+
await browser_logger.info(
|
|
139
|
+
page, f"{func_name}: Chrome PDF: Detecting PDF viewer..."
|
|
140
|
+
)
|
|
141
|
+
if not await detect_chrome_pdf_viewer_async(page):
|
|
142
|
+
await browser_logger.warning(
|
|
143
|
+
page,
|
|
144
|
+
f"{func_name}: Chrome PDF: No PDF viewer detected at {str(page.url)}",
|
|
145
|
+
)
|
|
146
|
+
await browser_logger.warning(
|
|
147
|
+
page,
|
|
148
|
+
f"{func_name}: Chrome PDF: ✗ No PDF viewer detected!",
|
|
149
|
+
)
|
|
150
|
+
await page.wait_for_timeout(2000) # Show message for 2s
|
|
151
|
+
await page.close()
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
# Step 4: PDF viewer detected!
|
|
155
|
+
await browser_logger.info(
|
|
156
|
+
page,
|
|
157
|
+
f"{func_name}: Chrome PDF: PDF viewer detected, attempting download...",
|
|
158
|
+
)
|
|
159
|
+
await browser_logger.info(
|
|
160
|
+
page, f"{func_name}: Chrome PDF: ✓ PDF viewer detected!"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Wait for PDF to fully render for visual feedback (especially in interactive mode)
|
|
164
|
+
await browser_logger.info(
|
|
165
|
+
page,
|
|
166
|
+
f"{func_name}: Chrome PDF: Waiting for PDF to render (5s)...",
|
|
167
|
+
)
|
|
168
|
+
await page.wait_for_timeout(5000) # 5 seconds for visual confirmation
|
|
169
|
+
await human.random_delay_async(1000, 2000, page=page)
|
|
170
|
+
|
|
171
|
+
# Step 5: Show grid and click center
|
|
172
|
+
await browser_logger.info(
|
|
173
|
+
page, f"{func_name}: Chrome PDF: Showing grid overlay..."
|
|
174
|
+
)
|
|
175
|
+
await show_grid_async(page)
|
|
176
|
+
await browser_logger.info(
|
|
177
|
+
page, f"{func_name}: Chrome PDF: Clicking center of PDF..."
|
|
178
|
+
)
|
|
179
|
+
await click_center_async(page)
|
|
180
|
+
|
|
181
|
+
# Step 6: Click download button
|
|
182
|
+
await browser_logger.debug(
|
|
183
|
+
page, f"{func_name}: Chrome PDF: Clicking download button..."
|
|
184
|
+
)
|
|
185
|
+
await browser_logger.info(
|
|
186
|
+
page, f"{func_name}: Chrome PDF: Clicking download button..."
|
|
187
|
+
)
|
|
188
|
+
is_downloaded = await click_download_for_chrome_pdf_viewer_async(
|
|
189
|
+
page, output_path
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Step 7: Wait for download to complete (use networkidle for patience)
|
|
193
|
+
await browser_logger.debug(
|
|
194
|
+
page,
|
|
195
|
+
f"{func_name}: Chrome PDF: Waiting for download to complete...",
|
|
196
|
+
)
|
|
197
|
+
await browser_logger.info(
|
|
198
|
+
page,
|
|
199
|
+
f"{func_name}: Chrome PDF: Waiting for download (networkidle up to 30s)...",
|
|
200
|
+
)
|
|
201
|
+
try:
|
|
202
|
+
# Wait for any download-related network activity to complete
|
|
203
|
+
await page.wait_for_load_state("networkidle", timeout=30_000)
|
|
204
|
+
await browser_logger.debug(
|
|
205
|
+
page,
|
|
206
|
+
f"{func_name}: Chrome PDF: Network idle after download click",
|
|
207
|
+
)
|
|
208
|
+
await browser_logger.info(
|
|
209
|
+
page,
|
|
210
|
+
f"{func_name}: Chrome PDF: ✓ Download network activity complete",
|
|
211
|
+
)
|
|
212
|
+
await page.wait_for_timeout(2000)
|
|
213
|
+
except Exception as e:
|
|
214
|
+
await browser_logger.debug(
|
|
215
|
+
page,
|
|
216
|
+
f"{func_name}: Download networkidle timeout (non-fatal): {e}",
|
|
217
|
+
)
|
|
218
|
+
await browser_logger.info(
|
|
219
|
+
page,
|
|
220
|
+
f"{func_name}: Chrome PDF: Network timeout, checking file...",
|
|
221
|
+
)
|
|
222
|
+
await page.wait_for_timeout(2000)
|
|
223
|
+
|
|
224
|
+
# Step 8: Check if file was actually downloaded
|
|
225
|
+
# Check browser download directory for new files (even if Playwright event didn't fire)
|
|
226
|
+
files_after = (
|
|
227
|
+
set(browser_downloads_dir.glob("*"))
|
|
228
|
+
if browser_downloads_dir.exists()
|
|
229
|
+
else set()
|
|
230
|
+
)
|
|
231
|
+
new_files = files_after - files_before
|
|
232
|
+
download_duration = time.time() - download_start_time
|
|
233
|
+
|
|
234
|
+
logger.info(f"{func_name}: Checking download result...")
|
|
235
|
+
logger.info(
|
|
236
|
+
f"{func_name}: is_downloaded (Playwright): {is_downloaded}"
|
|
237
|
+
)
|
|
238
|
+
logger.info(f"{func_name}: output_path: {output_path}")
|
|
239
|
+
logger.info(f"{func_name}: Files before: {len(files_before)}")
|
|
240
|
+
logger.info(f"{func_name}: Files after: {len(files_after)}")
|
|
241
|
+
logger.info(f"{func_name}: New files: {len(new_files)}")
|
|
242
|
+
|
|
243
|
+
if new_files:
|
|
244
|
+
# Found new file(s) in download directory
|
|
245
|
+
downloaded_file = max(new_files, key=lambda p: p.stat().st_mtime)
|
|
246
|
+
file_size = downloaded_file.stat().st_size
|
|
247
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
248
|
+
|
|
249
|
+
logger.info(
|
|
250
|
+
f"{func_name}: Found downloaded file: {downloaded_file.name}"
|
|
251
|
+
)
|
|
252
|
+
logger.info(f"{func_name}: Size: {file_size_mb:.2f} MB")
|
|
253
|
+
logger.info(f"{func_name}: Duration: {download_duration:.1f}s")
|
|
254
|
+
logger.info(f"{func_name}: Location: {downloaded_file}")
|
|
255
|
+
|
|
256
|
+
if file_size > 1000: # At least 1KB
|
|
257
|
+
# Rename to desired output filename
|
|
258
|
+
import shutil
|
|
259
|
+
|
|
260
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
261
|
+
shutil.move(str(downloaded_file), str(output_path))
|
|
262
|
+
|
|
263
|
+
await browser_logger.info(
|
|
264
|
+
page,
|
|
265
|
+
f"{func_name}: ✓ Downloaded {file_size_mb:.2f} MB in {download_duration:.1f}s",
|
|
266
|
+
)
|
|
267
|
+
await browser_logger.info(
|
|
268
|
+
page,
|
|
269
|
+
f"{func_name}: ✓ Saved to: {str(output_path)}",
|
|
270
|
+
)
|
|
271
|
+
logger.info(
|
|
272
|
+
f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
|
|
273
|
+
)
|
|
274
|
+
await page.wait_for_timeout(3000)
|
|
275
|
+
await page.close()
|
|
276
|
+
return output_path
|
|
277
|
+
|
|
278
|
+
if is_downloaded and output_path.exists():
|
|
279
|
+
file_size = output_path.stat().st_size
|
|
280
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
281
|
+
if file_size > 1000: # At least 1KB
|
|
282
|
+
await browser_logger.info(
|
|
283
|
+
page,
|
|
284
|
+
f"{func_name}: ✓ Downloaded {file_size_mb:.2f} MB",
|
|
285
|
+
)
|
|
286
|
+
await browser_logger.info(
|
|
287
|
+
page,
|
|
288
|
+
f"{func_name}: ✓ Saved to: {str(output_path)}",
|
|
289
|
+
)
|
|
290
|
+
logger.info(
|
|
291
|
+
f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
|
|
292
|
+
)
|
|
293
|
+
await page.wait_for_timeout(3000) # Show info for 3s
|
|
294
|
+
await page.close()
|
|
295
|
+
return output_path
|
|
296
|
+
else:
|
|
297
|
+
await browser_logger.warning(
|
|
298
|
+
page,
|
|
299
|
+
f"{func_name}: ✗ File too small: {file_size} bytes",
|
|
300
|
+
)
|
|
301
|
+
logger.warning(
|
|
302
|
+
f"{func_name}: Download failed - file too small: {file_size} bytes"
|
|
303
|
+
)
|
|
304
|
+
await page.wait_for_timeout(2000)
|
|
305
|
+
await page.close()
|
|
306
|
+
return None
|
|
307
|
+
elif output_path.exists():
|
|
308
|
+
# File exists but is_downloaded is False - still check file
|
|
309
|
+
file_size = output_path.stat().st_size
|
|
310
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
311
|
+
if file_size > 1000:
|
|
312
|
+
await browser_logger.info(
|
|
313
|
+
page,
|
|
314
|
+
f"{func_name}: ✓ File found: {file_size_mb:.2f} MB",
|
|
315
|
+
)
|
|
316
|
+
await browser_logger.info(
|
|
317
|
+
page,
|
|
318
|
+
f"{func_name}: ✓ Saved to: {str(output_path)}",
|
|
319
|
+
)
|
|
320
|
+
logger.info(
|
|
321
|
+
f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
|
|
322
|
+
)
|
|
323
|
+
await page.wait_for_timeout(3000)
|
|
324
|
+
await page.close()
|
|
325
|
+
return output_path
|
|
326
|
+
|
|
327
|
+
await browser_logger.warning(
|
|
328
|
+
page, f"{func_name}: ✗ Download did not complete"
|
|
329
|
+
)
|
|
330
|
+
logger.warning(
|
|
331
|
+
f"{func_name}: Download did not complete (is_downloaded={is_downloaded}, exists={output_path.exists()})"
|
|
332
|
+
)
|
|
333
|
+
await page.wait_for_timeout(2000)
|
|
334
|
+
await page.close()
|
|
335
|
+
|
|
336
|
+
if is_downloaded:
|
|
337
|
+
await browser_logger.info(
|
|
338
|
+
page,
|
|
339
|
+
f"{func_name}: Downloaded via Chrome PDF Viewer from {str(pdf_url)} to {str(output_path)}",
|
|
340
|
+
)
|
|
341
|
+
return output_path
|
|
342
|
+
else:
|
|
343
|
+
await browser_logger.debug(
|
|
344
|
+
page,
|
|
345
|
+
f"{func_name}: Chrome PDF Viewer method did not work for {str(pdf_url)}",
|
|
346
|
+
)
|
|
347
|
+
return None
|
|
348
|
+
|
|
349
|
+
except Exception as ee:
|
|
350
|
+
# Log error safely without browser popup (avoids recursive errors)
|
|
351
|
+
error_msg = f"{func_name}: Chrome PDF Viewer failed: {type(ee).__name__}: {str(ee)}"
|
|
352
|
+
logger.error(error_msg)
|
|
353
|
+
logger.debug(f" URL: {pdf_url}")
|
|
354
|
+
logger.debug(f" Output: {output_path}")
|
|
355
|
+
|
|
356
|
+
if page:
|
|
357
|
+
try:
|
|
358
|
+
await browser_logger.info(
|
|
359
|
+
page,
|
|
360
|
+
f"{func_name}: Chrome PDF: ✗ EXCEPTION: {type(ee).__name__}",
|
|
361
|
+
)
|
|
362
|
+
await page.wait_for_timeout(2000) # Show error for 2s
|
|
363
|
+
except Exception as popup_error:
|
|
364
|
+
logger.debug(
|
|
365
|
+
f"{func_name}: Could not show error popup: {popup_error}"
|
|
366
|
+
)
|
|
367
|
+
finally:
|
|
368
|
+
try:
|
|
369
|
+
await page.close()
|
|
370
|
+
except Exception as close_error:
|
|
371
|
+
logger.debug(
|
|
372
|
+
f"{func_name}: Error closing page: {close_error}"
|
|
373
|
+
)
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
# EOF
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-13 07:59:52 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/direct_download.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/pdf_download/strategies/direct_download.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
"""Direct Download Strategy"""
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from playwright.async_api import BrowserContext
|
|
19
|
+
|
|
20
|
+
from scitex import logging
|
|
21
|
+
from scitex.scholar.browser import browser_logger
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def try_download_direct_async(
|
|
27
|
+
context: BrowserContext,
|
|
28
|
+
pdf_url: str,
|
|
29
|
+
output_path: Path,
|
|
30
|
+
func_name: str = "try_download_direct_async",
|
|
31
|
+
) -> Optional[Path]:
|
|
32
|
+
"""Handle direct download that triggers ERR_ABORTED."""
|
|
33
|
+
page = None
|
|
34
|
+
try:
|
|
35
|
+
page = await context.new_page()
|
|
36
|
+
await browser_logger.info(
|
|
37
|
+
page, f"{func_name}: Trying direct download from {pdf_url}"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
download_occurred = False
|
|
41
|
+
|
|
42
|
+
async def handle_download(download):
|
|
43
|
+
nonlocal download_occurred
|
|
44
|
+
await download.save_as(output_path)
|
|
45
|
+
download_occurred = True
|
|
46
|
+
|
|
47
|
+
page.on("download", handle_download)
|
|
48
|
+
|
|
49
|
+
# Step 1: Navigate
|
|
50
|
+
await browser_logger.info(
|
|
51
|
+
page,
|
|
52
|
+
f"{func_name}: Direct Download: Navigating to {pdf_url[:60]}...",
|
|
53
|
+
)
|
|
54
|
+
try:
|
|
55
|
+
await page.goto(pdf_url, wait_until="load", timeout=60_000)
|
|
56
|
+
await browser_logger.info(
|
|
57
|
+
page,
|
|
58
|
+
f"{func_name}: Direct Download: Loaded at {page.url[:80]}",
|
|
59
|
+
)
|
|
60
|
+
except Exception as ee:
|
|
61
|
+
if "ERR_ABORTED" in str(ee):
|
|
62
|
+
await browser_logger.info(
|
|
63
|
+
page,
|
|
64
|
+
f"{func_name}: Direct Download: ERR_ABORTED detected - likely direct download",
|
|
65
|
+
)
|
|
66
|
+
await browser_logger.info(
|
|
67
|
+
page,
|
|
68
|
+
f"{func_name}: Direct Download: ERR_ABORTED (download may have started)",
|
|
69
|
+
)
|
|
70
|
+
await page.wait_for_timeout(5_000)
|
|
71
|
+
else:
|
|
72
|
+
await browser_logger.info(
|
|
73
|
+
page,
|
|
74
|
+
f"{func_name}: Direct Download: ✗ Error: {str(ee)[:80]}",
|
|
75
|
+
)
|
|
76
|
+
await page.wait_for_timeout(2000)
|
|
77
|
+
raise ee
|
|
78
|
+
|
|
79
|
+
# Step 2: Check result
|
|
80
|
+
if download_occurred and output_path.exists():
|
|
81
|
+
size_MiB = output_path.stat().st_size / 1024 / 1024
|
|
82
|
+
await browser_logger.info(
|
|
83
|
+
page,
|
|
84
|
+
f"{func_name}: Direct download: from {pdf_url} to {output_path} ({size_MiB:.2f} MiB)",
|
|
85
|
+
)
|
|
86
|
+
await browser_logger.info(
|
|
87
|
+
page,
|
|
88
|
+
f"{func_name}: Direct Download: ✓ Downloaded {size_MiB:.2f} MB",
|
|
89
|
+
)
|
|
90
|
+
await page.wait_for_timeout(2000)
|
|
91
|
+
await page.close()
|
|
92
|
+
return output_path
|
|
93
|
+
else:
|
|
94
|
+
await browser_logger.debug(
|
|
95
|
+
page,
|
|
96
|
+
f"{func_name}: Direct download: No download event occurred",
|
|
97
|
+
)
|
|
98
|
+
await browser_logger.info(
|
|
99
|
+
page,
|
|
100
|
+
f"{func_name}: Direct Download: ✗ No download event occurred",
|
|
101
|
+
)
|
|
102
|
+
await page.wait_for_timeout(2000)
|
|
103
|
+
|
|
104
|
+
await page.close()
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
except Exception as ee:
|
|
108
|
+
if page is not None:
|
|
109
|
+
await browser_logger.warning(
|
|
110
|
+
page, f"{func_name}: Direct download failed: {ee}"
|
|
111
|
+
)
|
|
112
|
+
try:
|
|
113
|
+
await browser_logger.info(
|
|
114
|
+
page,
|
|
115
|
+
f"{func_name}: Direct Download: ✗ EXCEPTION: {str(ee)[:100]}",
|
|
116
|
+
)
|
|
117
|
+
await page.wait_for_timeout(2000)
|
|
118
|
+
except Exception as popup_error:
|
|
119
|
+
logger.debug(
|
|
120
|
+
f"{func_name}: Could not show error popup: {popup_error}"
|
|
121
|
+
)
|
|
122
|
+
finally:
|
|
123
|
+
try:
|
|
124
|
+
await page.close()
|
|
125
|
+
except Exception as close_error:
|
|
126
|
+
logger.debug(
|
|
127
|
+
f"{func_name}: Error closing page: {close_error}"
|
|
128
|
+
)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
# EOF
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-13 08:00:08 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
"""Manual Download Fallback Strategy"""
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from playwright.async_api import BrowserContext
|
|
19
|
+
|
|
20
|
+
from scitex import logging
|
|
21
|
+
from scitex.scholar import ScholarConfig
|
|
22
|
+
from scitex.scholar.browser import browser_logger
|
|
23
|
+
from scitex.scholar.pdf_download.strategies.manual_download_utils import (
|
|
24
|
+
DownloadMonitorAndSync,
|
|
25
|
+
complete_manual_download_workflow_async,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def try_download_manual_async(
|
|
32
|
+
context: BrowserContext,
|
|
33
|
+
pdf_url: str,
|
|
34
|
+
output_path: Path,
|
|
35
|
+
func_name: str = "try_download_manual_async",
|
|
36
|
+
config: ScholarConfig = None,
|
|
37
|
+
doi: Optional[str] = None,
|
|
38
|
+
) -> Optional[Path]:
|
|
39
|
+
"""Manual download fallback strategy.
|
|
40
|
+
|
|
41
|
+
Opens PDF URL in browser, shows instructions, and monitors downloads directory.
|
|
42
|
+
When user manually downloads the PDF, it automatically detects and organizes it.
|
|
43
|
+
|
|
44
|
+
NOTE: This method should NOT check the _scitex_is_manual_mode flag because
|
|
45
|
+
it IS the manual mode implementation!
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
context: Browser context
|
|
49
|
+
pdf_url: URL of the PDF to download
|
|
50
|
+
output_path: Where to save the final PDF
|
|
51
|
+
func_name: Name for logging
|
|
52
|
+
config: Scholar configuration
|
|
53
|
+
doi: Optional DOI for filename generation
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Path to downloaded file, or None if failed
|
|
57
|
+
"""
|
|
58
|
+
config = config or ScholarConfig()
|
|
59
|
+
page = None
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# Create new page and navigate to PDF
|
|
63
|
+
page = await context.new_page()
|
|
64
|
+
|
|
65
|
+
await browser_logger.info(
|
|
66
|
+
page,
|
|
67
|
+
f"{func_name}: Opening PDF for manual download...",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
await page.goto(pdf_url, timeout=30000, wait_until="domcontentloaded")
|
|
71
|
+
|
|
72
|
+
await browser_logger.info(
|
|
73
|
+
page,
|
|
74
|
+
f"{func_name}: Please download the PDF manually from this page",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Setup monitoring
|
|
78
|
+
downloads_dir = config.get_library_downloads_dir()
|
|
79
|
+
master_dir = config.get_library_master_dir()
|
|
80
|
+
monitor = DownloadMonitorAndSync(downloads_dir, master_dir)
|
|
81
|
+
|
|
82
|
+
# Progress logger
|
|
83
|
+
def log_progress(msg: str):
|
|
84
|
+
logger.info(f"{func_name}: {msg}")
|
|
85
|
+
|
|
86
|
+
# Extract DOI from URL if not provided
|
|
87
|
+
if not doi and "doi.org/" in pdf_url:
|
|
88
|
+
doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
|
|
89
|
+
elif not doi and "/doi/" in pdf_url:
|
|
90
|
+
# Try to extract DOI from URL like /doi/10.1212/...
|
|
91
|
+
import re
|
|
92
|
+
|
|
93
|
+
match = re.search(r"/doi/(10\.\d+/[^\s?#]+)", pdf_url)
|
|
94
|
+
if match:
|
|
95
|
+
doi = match.group(1)
|
|
96
|
+
|
|
97
|
+
# Show instructions and start monitoring
|
|
98
|
+
log_progress(f"Monitoring {downloads_dir} for new PDFs...")
|
|
99
|
+
log_progress("Please download the PDF manually from the browser")
|
|
100
|
+
|
|
101
|
+
# Monitor for download (2 minutes timeout to prevent process accumulation)
|
|
102
|
+
temp_file = await monitor.monitor_for_new_download_async(
|
|
103
|
+
timeout_sec=120, # 2 minutes
|
|
104
|
+
check_interval_sec=1.0,
|
|
105
|
+
logger_func=log_progress,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if not temp_file:
|
|
109
|
+
await browser_logger.error(
|
|
110
|
+
page,
|
|
111
|
+
f"{func_name}: No new PDF detected in 120 seconds",
|
|
112
|
+
)
|
|
113
|
+
logger.error(f"{func_name}: Download monitoring timeout")
|
|
114
|
+
await page.close()
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
await browser_logger.info(
|
|
118
|
+
page,
|
|
119
|
+
f"{func_name}: Detected: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Sync to library
|
|
123
|
+
final_path = monitor.sync_to_final_destination(
|
|
124
|
+
temp_file,
|
|
125
|
+
doi=doi,
|
|
126
|
+
url=pdf_url,
|
|
127
|
+
content_type="main",
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
await browser_logger.info(
|
|
131
|
+
page,
|
|
132
|
+
f"{func_name}: Synced to library: {final_path.name}",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Copy to requested output path
|
|
136
|
+
if final_path and final_path.exists():
|
|
137
|
+
import shutil
|
|
138
|
+
|
|
139
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
shutil.copy2(str(final_path), str(output_path))
|
|
141
|
+
|
|
142
|
+
await browser_logger.info(
|
|
143
|
+
page,
|
|
144
|
+
f"{func_name}: Manual download complete!",
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
logger.info(f"{func_name}: Manual download saved to {output_path}")
|
|
148
|
+
await page.close()
|
|
149
|
+
return output_path
|
|
150
|
+
|
|
151
|
+
await page.close()
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(f"{func_name}: Manual download failed: {e}")
|
|
156
|
+
if page:
|
|
157
|
+
try:
|
|
158
|
+
await browser_logger.error(
|
|
159
|
+
page,
|
|
160
|
+
f"{func_name}: Error: {type(e).__name__}",
|
|
161
|
+
)
|
|
162
|
+
await page.close()
|
|
163
|
+
except Exception:
|
|
164
|
+
pass
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
# EOF
|