scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Time-stamp: "2025-08-01 13:00:00"
|
|
4
|
+
# Author: Yusuke Watanabe
|
|
5
|
+
# File: _ShibbolethAuthenticator.py
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Shibboleth authentication for institutional access to academic papers.
|
|
9
|
+
|
|
10
|
+
This module provides authentication through Shibboleth single sign-on
|
|
11
|
+
to enable legal PDF downloads via institutional subscriptions.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
from datetime import datetime, timedelta
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
20
|
+
from urllib.parse import urlparse, urljoin, parse_qs
|
|
21
|
+
|
|
22
|
+
from scitex import logging
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from playwright.async_api import async_playwright, Page, Browser
|
|
26
|
+
except ImportError:
|
|
27
|
+
async_playwright = None
|
|
28
|
+
Page = None
|
|
29
|
+
Browser = None
|
|
30
|
+
|
|
31
|
+
from scitex.errors import ScholarError
|
|
32
|
+
from .BaseAuthenticator import BaseAuthenticator
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ShibbolethError(ScholarError):
|
|
38
|
+
"""Raised when Shibboleth authentication fails."""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ShibbolethAuthenticator(BaseAuthenticator):
|
|
43
|
+
"""
|
|
44
|
+
Handles Shibboleth authentication for institutional access.
|
|
45
|
+
|
|
46
|
+
Shibboleth is a single sign-on (SSO) system that provides federated
|
|
47
|
+
identity management and access control for academic resources.
|
|
48
|
+
|
|
49
|
+
This authenticator:
|
|
50
|
+
1. Authenticates via institutional Identity Provider (IdP)
|
|
51
|
+
2. Handles SAML assertions and attribute exchange
|
|
52
|
+
3. Maintains authenticate_async sessions
|
|
53
|
+
4. Returns session cookies for use by download strategies
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
institution: Optional[str] = None,
|
|
59
|
+
idp_url: Optional[str] = None,
|
|
60
|
+
username: Optional[str] = None,
|
|
61
|
+
password: Optional[str] = None,
|
|
62
|
+
entity_id: Optional[str] = None,
|
|
63
|
+
cache_dir: Optional[Path] = None,
|
|
64
|
+
timeout: int = 120,
|
|
65
|
+
debug_mode: bool = False,
|
|
66
|
+
**kwargs,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize Shibboleth authenticator.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
institution: Institution name (e.g., 'University of Example')
|
|
73
|
+
idp_url: Identity Provider URL
|
|
74
|
+
username: Username for authentication
|
|
75
|
+
password: Password for authentication
|
|
76
|
+
entity_id: Entity ID for the institution
|
|
77
|
+
cache_dir: Directory for session cache
|
|
78
|
+
timeout: Authentication timeout in seconds
|
|
79
|
+
debug_mode: Enable debug logging
|
|
80
|
+
"""
|
|
81
|
+
super().__init__(
|
|
82
|
+
config={
|
|
83
|
+
"institution": institution,
|
|
84
|
+
"idp_url": idp_url,
|
|
85
|
+
"username": username,
|
|
86
|
+
"entity_id": entity_id,
|
|
87
|
+
"debug_mode": debug_mode,
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
self.institution = institution
|
|
92
|
+
self.idp_url = idp_url
|
|
93
|
+
self.username = username
|
|
94
|
+
self.password = password
|
|
95
|
+
self.entity_id = entity_id
|
|
96
|
+
self.timeout = timeout
|
|
97
|
+
self.debug_mode = debug_mode
|
|
98
|
+
|
|
99
|
+
# Session cache directory
|
|
100
|
+
self.cache_dir = (
|
|
101
|
+
cache_dir or Path.home() / ".scitex" / "scholar" / "shibboleth_sessions"
|
|
102
|
+
)
|
|
103
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
|
|
105
|
+
# Session file path
|
|
106
|
+
self.session_file = self.cache_dir / f"session_{self._get_session_async_key()}.json"
|
|
107
|
+
|
|
108
|
+
# Session management
|
|
109
|
+
self._cookies: Dict[str, str] = {}
|
|
110
|
+
self._full_cookies: List[Dict[str, Any]] = []
|
|
111
|
+
self._session_expiry: Optional[datetime] = None
|
|
112
|
+
self._saml_attributes: Dict[str, Any] = {}
|
|
113
|
+
|
|
114
|
+
# Common Shibboleth endpoints and patterns
|
|
115
|
+
self.wayf_urls = [
|
|
116
|
+
"https://wayf.surfnet.nl", # Dutch federation
|
|
117
|
+
"https://discovery.eduserv.org.uk", # UK federation
|
|
118
|
+
"https://wayf.incommonfederation.org", # InCommon (US)
|
|
119
|
+
"https://ds.aai.switch.ch", # Swiss federation
|
|
120
|
+
"https://discovery.shibboleth.net", # Generic discovery
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
# Common IdP login patterns
|
|
124
|
+
self.idp_patterns = {
|
|
125
|
+
"username_field": [
|
|
126
|
+
"input[name='j_username']",
|
|
127
|
+
"input[name='username']",
|
|
128
|
+
"input[name='user']",
|
|
129
|
+
"input[id*='username']",
|
|
130
|
+
"input[type='text']",
|
|
131
|
+
],
|
|
132
|
+
"password_field": [
|
|
133
|
+
"input[name='j_password']",
|
|
134
|
+
"input[name='password']",
|
|
135
|
+
"input[name='pass']",
|
|
136
|
+
"input[id*='password']",
|
|
137
|
+
"input[type='password']",
|
|
138
|
+
],
|
|
139
|
+
"submit_button": [
|
|
140
|
+
"button[type='submit']",
|
|
141
|
+
"input[type='submit']",
|
|
142
|
+
"button[name='_eventId_proceed']",
|
|
143
|
+
"button:has-text('Login')",
|
|
144
|
+
"button:has-text('Sign in')",
|
|
145
|
+
],
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
# Load existing session
|
|
149
|
+
self._load_session()
|
|
150
|
+
|
|
151
|
+
def _get_session_async_key(self) -> str:
|
|
152
|
+
"""Generate unique session key for this configuration."""
|
|
153
|
+
key_parts = []
|
|
154
|
+
if self.institution:
|
|
155
|
+
key_parts.append(self.institution.replace(" ", "_"))
|
|
156
|
+
if self.username:
|
|
157
|
+
key_parts.append(self.username)
|
|
158
|
+
return "_".join(key_parts) or "default"
|
|
159
|
+
|
|
160
|
+
def _load_session(self) -> None:
|
|
161
|
+
"""Load existing session from cache."""
|
|
162
|
+
if self.session_file.exists():
|
|
163
|
+
try:
|
|
164
|
+
with open(self.session_file, "r") as f:
|
|
165
|
+
data = json.load(f)
|
|
166
|
+
|
|
167
|
+
# Check if session is expired
|
|
168
|
+
expiry_str = data.get("expiry")
|
|
169
|
+
if expiry_str:
|
|
170
|
+
expiry = datetime.fromisoformat(expiry_str)
|
|
171
|
+
if expiry > datetime.now():
|
|
172
|
+
self._cookies = data.get("cookies", {})
|
|
173
|
+
self._full_cookies = data.get("full_cookies", [])
|
|
174
|
+
self._session_expiry = expiry
|
|
175
|
+
self._saml_attributes = data.get("saml_attributes", {})
|
|
176
|
+
logger.info(f"{self.name}: Loaded existing Shibboleth session")
|
|
177
|
+
else:
|
|
178
|
+
logger.info(f"{self.name}: Existing Shibboleth session expired")
|
|
179
|
+
self.session_file.unlink()
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.warning(f"Failed to load session: {e}")
|
|
182
|
+
|
|
183
|
+
def _save_session_async(self) -> None:
|
|
184
|
+
"""Save current session to cache."""
|
|
185
|
+
if self._cookies and self._session_expiry:
|
|
186
|
+
try:
|
|
187
|
+
data = {
|
|
188
|
+
"cookies": self._cookies,
|
|
189
|
+
"full_cookies": self._full_cookies,
|
|
190
|
+
"expiry": self._session_expiry.isoformat(),
|
|
191
|
+
"institution": self.institution,
|
|
192
|
+
"username": self.username,
|
|
193
|
+
"saml_attributes": self._saml_attributes,
|
|
194
|
+
}
|
|
195
|
+
with open(self.session_file, "w") as f:
|
|
196
|
+
json.dump(data, f, indent=2)
|
|
197
|
+
logger.info(f"{self.name}: Saved Shibboleth session")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.warning(f"Failed to save session: {e}")
|
|
200
|
+
|
|
201
|
+
async def authenticate_async(self, force: bool = False, **kwargs) -> dict:
|
|
202
|
+
"""
|
|
203
|
+
Authenticate with Shibboleth and return session data.
|
|
204
|
+
|
|
205
|
+
The Shibboleth authentication flow typically involves:
|
|
206
|
+
1. Accessing a protected resource
|
|
207
|
+
2. Redirect to WAYF (Where Are You From) service
|
|
208
|
+
3. Selecting institution
|
|
209
|
+
4. Redirect to institution's IdP
|
|
210
|
+
5. Authentication at IdP
|
|
211
|
+
6. SAML assertion sent back to Service Provider
|
|
212
|
+
7. Access granted to resource
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
force: Force re-authentication even if session exists
|
|
216
|
+
**kwargs: Additional parameters (e.g., resource_url)
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Dictionary containing session cookies and SAML attributes
|
|
220
|
+
|
|
221
|
+
Raises:
|
|
222
|
+
ShibbolethError: If authentication fails
|
|
223
|
+
"""
|
|
224
|
+
if async_playwright is None:
|
|
225
|
+
raise ShibbolethError(
|
|
226
|
+
"Playwright is required for Shibboleth authentication. "
|
|
227
|
+
"Install with: pip install playwright && playwright install chromium"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Check existing session
|
|
231
|
+
if not force and await self.is_authenticate_async():
|
|
232
|
+
logger.info(f"{self.name}: Using existing Shibboleth session")
|
|
233
|
+
return {
|
|
234
|
+
"cookies": self._cookies,
|
|
235
|
+
"full_cookies": self._full_cookies,
|
|
236
|
+
"saml_attributes": self._saml_attributes,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Get resource URL to access (triggers Shibboleth flow)
|
|
240
|
+
resource_url = kwargs.get("resource_url", "https://www.nature.com/siteindex")
|
|
241
|
+
|
|
242
|
+
logger.info(f"Authenticating with Shibboleth for {self.institution or 'institution'}")
|
|
243
|
+
|
|
244
|
+
async with async_playwright() as p:
|
|
245
|
+
browser = await p.chromium.launch(
|
|
246
|
+
headless=not self.debug_mode,
|
|
247
|
+
args=['--disable-blink-features=AutomationControlled']
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
context = await browser.new_context(
|
|
252
|
+
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
253
|
+
)
|
|
254
|
+
page = await context.new_page()
|
|
255
|
+
|
|
256
|
+
# Step 1: Access protected resource
|
|
257
|
+
await page.goto(resource_url, wait_until="networkidle")
|
|
258
|
+
|
|
259
|
+
# Step 2: Look for institutional login option
|
|
260
|
+
login_found = await self._find_institutional_login_async(page)
|
|
261
|
+
|
|
262
|
+
if login_found:
|
|
263
|
+
# Click institutional login
|
|
264
|
+
await login_found.click()
|
|
265
|
+
await page.wait_for_load_state("networkidle")
|
|
266
|
+
|
|
267
|
+
# Step 3: Handle WAYF/Discovery Service
|
|
268
|
+
wayf_handled = await self._handle_wayf_selection_async(page)
|
|
269
|
+
|
|
270
|
+
if not wayf_handled and not self.idp_url:
|
|
271
|
+
raise ShibbolethError("Could not find institution selection page")
|
|
272
|
+
|
|
273
|
+
# Step 4: Handle IdP login
|
|
274
|
+
if self.idp_url and page.url.startswith(self.idp_url):
|
|
275
|
+
await self._handle_idp_login_async(page)
|
|
276
|
+
else:
|
|
277
|
+
# Try to detect and handle IdP automatically
|
|
278
|
+
await self._handle_idp_login_async(page)
|
|
279
|
+
|
|
280
|
+
# Step 5: Wait for redirect back to resource
|
|
281
|
+
try:
|
|
282
|
+
await page.wait_for_function(
|
|
283
|
+
f"""() => {{
|
|
284
|
+
return !window.location.href.includes('idp') &&
|
|
285
|
+
!window.location.href.includes('wayf') &&
|
|
286
|
+
!window.location.href.includes('discovery');
|
|
287
|
+
}}""",
|
|
288
|
+
timeout=30000
|
|
289
|
+
)
|
|
290
|
+
except:
|
|
291
|
+
# Continue anyway - might still be authenticate_async
|
|
292
|
+
pass
|
|
293
|
+
|
|
294
|
+
# Extract cookies and SAML attributes
|
|
295
|
+
cookies = await context.cookies()
|
|
296
|
+
|
|
297
|
+
# Try to extract SAML attributes from page or headers
|
|
298
|
+
self._saml_attributes = await self._extract_saml_attributes_async(page)
|
|
299
|
+
|
|
300
|
+
# Convert cookies
|
|
301
|
+
self._cookies = {c["name"]: c["value"] for c in cookies}
|
|
302
|
+
self._full_cookies = cookies
|
|
303
|
+
|
|
304
|
+
# Set session expiry (typically 8-12 hours for Shibboleth)
|
|
305
|
+
self._session_expiry = datetime.now() + timedelta(hours=8)
|
|
306
|
+
|
|
307
|
+
# Save session
|
|
308
|
+
self._save_session_async()
|
|
309
|
+
|
|
310
|
+
logger.info(f"{self.name}: Shibboleth authentication successful")
|
|
311
|
+
return {
|
|
312
|
+
"cookies": self._cookies,
|
|
313
|
+
"full_cookies": self._full_cookies,
|
|
314
|
+
"saml_attributes": self._saml_attributes,
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.error(f"Shibboleth authentication failed: {e}")
|
|
319
|
+
raise ShibbolethError(f"Authentication failed: {str(e)}")
|
|
320
|
+
finally:
|
|
321
|
+
await browser.close()
|
|
322
|
+
|
|
323
|
+
async def _find_institutional_login_async(self, page: Page) -> Optional[Any]:
|
|
324
|
+
"""Find and return institutional login link/button."""
|
|
325
|
+
selectors = [
|
|
326
|
+
"a:has-text('Institutional')",
|
|
327
|
+
"a:has-text('Institution')",
|
|
328
|
+
"a:has-text('Shibboleth')",
|
|
329
|
+
"a:has-text('Federation')",
|
|
330
|
+
"a:has-text('Access through your institution')",
|
|
331
|
+
"button:has-text('Institutional')",
|
|
332
|
+
"a[href*='shibboleth']",
|
|
333
|
+
"a[href*='wayf']",
|
|
334
|
+
"a[href*='idp']",
|
|
335
|
+
]
|
|
336
|
+
|
|
337
|
+
for selector in selectors:
|
|
338
|
+
element = await page.query_selector(selector)
|
|
339
|
+
if element and await element.is_visible():
|
|
340
|
+
logger.debug(f"Found institutional login: {selector}")
|
|
341
|
+
return element
|
|
342
|
+
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
async def _handle_wayf_selection_async(self, page: Page) -> bool:
|
|
346
|
+
"""Handle WAYF/Discovery Service institution selection."""
|
|
347
|
+
# Check if we're on a WAYF page
|
|
348
|
+
wayf_indicators = ["wayf", "discovery", "ds.", "where are you from"]
|
|
349
|
+
current_url = page.url.lower()
|
|
350
|
+
page_content = await page.content()
|
|
351
|
+
|
|
352
|
+
is_wayf = any(ind in current_url for ind in wayf_indicators) or \
|
|
353
|
+
any(ind in page_content.lower() for ind in wayf_indicators)
|
|
354
|
+
|
|
355
|
+
if not is_wayf:
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
logger.info(f"{self.name}: Detected WAYF/Discovery Service page")
|
|
359
|
+
|
|
360
|
+
# Try to find institution selector
|
|
361
|
+
if self.institution:
|
|
362
|
+
# Search for institution in dropdown/list
|
|
363
|
+
selectors = [
|
|
364
|
+
f"option:has-text('{self.institution}')",
|
|
365
|
+
f"a:has-text('{self.institution}')",
|
|
366
|
+
f"li:has-text('{self.institution}')",
|
|
367
|
+
]
|
|
368
|
+
|
|
369
|
+
for selector in selectors:
|
|
370
|
+
element = await page.query_selector(selector)
|
|
371
|
+
if element:
|
|
372
|
+
# If it's an option, select it
|
|
373
|
+
if await element.evaluate("el => el.tagName") == "OPTION":
|
|
374
|
+
select = await element.evaluate_handle("el => el.parentElement")
|
|
375
|
+
await select.select_option(value=await element.get_attribute("value"))
|
|
376
|
+
# Find and click submit button
|
|
377
|
+
submit = await page.query_selector("button[type='submit'], input[type='submit']")
|
|
378
|
+
if submit:
|
|
379
|
+
await submit.click()
|
|
380
|
+
else:
|
|
381
|
+
# Direct click
|
|
382
|
+
await element.click()
|
|
383
|
+
|
|
384
|
+
await page.wait_for_load_state("networkidle")
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
# If automated selection fails, might need manual intervention
|
|
388
|
+
if self.debug_mode:
|
|
389
|
+
logger.info(f"{self.name}: Please select your institution manually")
|
|
390
|
+
await asyncio.sleep(30) # Give time for manual selection
|
|
391
|
+
|
|
392
|
+
return False
|
|
393
|
+
|
|
394
|
+
async def _handle_idp_login_async(self, page: Page) -> None:
|
|
395
|
+
"""Handle login at the Identity Provider."""
|
|
396
|
+
logger.info(f"{self.name}: Handling IdP login page")
|
|
397
|
+
|
|
398
|
+
# Get credentials
|
|
399
|
+
if not self.username:
|
|
400
|
+
self.username = input("Shibboleth username: ")
|
|
401
|
+
if not self.password:
|
|
402
|
+
import getpass
|
|
403
|
+
self.password = getpass.getpass("Shibboleth password: ")
|
|
404
|
+
|
|
405
|
+
# Try each username field pattern
|
|
406
|
+
username_filled = False
|
|
407
|
+
for selector in self.idp_patterns["username_field"]:
|
|
408
|
+
field = await page.query_selector(selector)
|
|
409
|
+
if field and await field.is_visible():
|
|
410
|
+
await field.fill(self.username)
|
|
411
|
+
username_filled = True
|
|
412
|
+
break
|
|
413
|
+
|
|
414
|
+
if not username_filled:
|
|
415
|
+
raise ShibbolethError("Could not find username field")
|
|
416
|
+
|
|
417
|
+
# Try each password field pattern
|
|
418
|
+
password_filled = False
|
|
419
|
+
for selector in self.idp_patterns["password_field"]:
|
|
420
|
+
field = await page.query_selector(selector)
|
|
421
|
+
if field and await field.is_visible():
|
|
422
|
+
await field.fill(self.password)
|
|
423
|
+
password_filled = True
|
|
424
|
+
break
|
|
425
|
+
|
|
426
|
+
if not password_filled:
|
|
427
|
+
raise ShibbolethError("Could not find password field")
|
|
428
|
+
|
|
429
|
+
# Try each submit button pattern
|
|
430
|
+
for selector in self.idp_patterns["submit_button"]:
|
|
431
|
+
button = await page.query_selector(selector)
|
|
432
|
+
if button and await button.is_visible():
|
|
433
|
+
await button.click()
|
|
434
|
+
break
|
|
435
|
+
|
|
436
|
+
# Wait for authentication to complete
|
|
437
|
+
await page.wait_for_load_state("networkidle")
|
|
438
|
+
|
|
439
|
+
async def _extract_saml_attributes_async(self, page: Page) -> Dict[str, Any]:
|
|
440
|
+
"""Try to extract SAML attributes from the page."""
|
|
441
|
+
attributes = {}
|
|
442
|
+
|
|
443
|
+
try:
|
|
444
|
+
# Some SPs expose attributes in meta tags
|
|
445
|
+
meta_tags = await page.query_selector_all("meta[name^='shib-']")
|
|
446
|
+
for tag in meta_tags:
|
|
447
|
+
name = await tag.get_attribute("name")
|
|
448
|
+
content = await tag.get_attribute("content")
|
|
449
|
+
if name and content:
|
|
450
|
+
attr_name = name.replace("shib-", "")
|
|
451
|
+
attributes[attr_name] = content
|
|
452
|
+
|
|
453
|
+
# Check for common attribute patterns in page
|
|
454
|
+
if not attributes:
|
|
455
|
+
# Try to find email/eppn
|
|
456
|
+
email_pattern = r'[\w._%+-]+@[\w.-]+\.[a-zA-Z]{2,}'
|
|
457
|
+
page_text = await page.text_content()
|
|
458
|
+
if page_text:
|
|
459
|
+
emails = re.findall(email_pattern, page_text)
|
|
460
|
+
if emails and self.username in emails[0]:
|
|
461
|
+
attributes["eppn"] = emails[0]
|
|
462
|
+
|
|
463
|
+
except Exception as e:
|
|
464
|
+
logger.debug(f"Could not extract SAML attributes: {e}")
|
|
465
|
+
|
|
466
|
+
return attributes
|
|
467
|
+
|
|
468
|
+
async def is_authenticate_async(self, verify_live: bool = False) -> bool:
|
|
469
|
+
"""
|
|
470
|
+
Check if we have a valid authenticate_async session.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
verify_live: If True, performs a live check
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
True if authenticate_async, False otherwise
|
|
477
|
+
"""
|
|
478
|
+
# Check if we have session data
|
|
479
|
+
if not self._cookies or not self._session_expiry:
|
|
480
|
+
return False
|
|
481
|
+
|
|
482
|
+
# Check if session is expired
|
|
483
|
+
if datetime.now() > self._session_expiry:
|
|
484
|
+
logger.info(f"{self.name}: Shibboleth session expired")
|
|
485
|
+
return False
|
|
486
|
+
|
|
487
|
+
# If requested, verify session is still valid
|
|
488
|
+
if verify_live:
|
|
489
|
+
try:
|
|
490
|
+
async with async_playwright() as p:
|
|
491
|
+
browser = await p.chromium.launch(headless=True)
|
|
492
|
+
context = await browser.new_context()
|
|
493
|
+
|
|
494
|
+
# Add cookies
|
|
495
|
+
await context.add_cookies(self._full_cookies)
|
|
496
|
+
|
|
497
|
+
page = await context.new_page()
|
|
498
|
+
|
|
499
|
+
# Try to access a protected resource
|
|
500
|
+
test_url = "https://www.nature.com/nature"
|
|
501
|
+
response = await page.goto(test_url, wait_until="networkidle")
|
|
502
|
+
|
|
503
|
+
# Check if we're redirected to login
|
|
504
|
+
is_valid = not any(
|
|
505
|
+
ind in page.url.lower()
|
|
506
|
+
for ind in ["login", "wayf", "idp", "shibboleth"]
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
await browser.close()
|
|
510
|
+
|
|
511
|
+
if not is_valid:
|
|
512
|
+
logger.info(f"{self.name}: Shibboleth session no longer valid")
|
|
513
|
+
self._cookies = {}
|
|
514
|
+
self._full_cookies = []
|
|
515
|
+
self._session_expiry = None
|
|
516
|
+
|
|
517
|
+
return is_valid
|
|
518
|
+
|
|
519
|
+
except Exception as e:
|
|
520
|
+
logger.warn(f"Failed to verify session: {e}")
|
|
521
|
+
return False
|
|
522
|
+
|
|
523
|
+
return True
|
|
524
|
+
|
|
525
|
+
async def get_auth_headers_async(self) -> Dict[str, str]:
|
|
526
|
+
"""
|
|
527
|
+
Get authentication headers.
|
|
528
|
+
|
|
529
|
+
Shibboleth typically uses cookies rather than headers,
|
|
530
|
+
but some SPs may use additional headers.
|
|
531
|
+
"""
|
|
532
|
+
headers = {}
|
|
533
|
+
|
|
534
|
+
# Some Shibboleth deployments use these headers
|
|
535
|
+
if self._saml_attributes:
|
|
536
|
+
if "eppn" in self._saml_attributes:
|
|
537
|
+
headers["X-Shibboleth-eppn"] = self._saml_attributes["eppn"]
|
|
538
|
+
if "affiliation" in self._saml_attributes:
|
|
539
|
+
headers["X-Shibboleth-affiliation"] = self._saml_attributes[
|
|
540
|
+
"affiliation"
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
return headers
|
|
544
|
+
|
|
545
|
+
async def get_auth_cookies_async(self) -> List[Dict[str, Any]]:
|
|
546
|
+
"""Get authentication cookies."""
|
|
547
|
+
if not await self.is_authenticate_async():
|
|
548
|
+
raise ShibbolethError("Not authenticate_async")
|
|
549
|
+
return self._full_cookies
|
|
550
|
+
|
|
551
|
+
async def logout_async(self) -> None:
|
|
552
|
+
"""
|
|
553
|
+
Log out and clear authentication state.
|
|
554
|
+
|
|
555
|
+
Note: Shibboleth logout_async is complex as it involves:
|
|
556
|
+
- Local application logout_async
|
|
557
|
+
- IdP logout_async
|
|
558
|
+
- Optional Single Logout (SLO) to all SPs
|
|
559
|
+
"""
|
|
560
|
+
self._cookies = {}
|
|
561
|
+
self._full_cookies = []
|
|
562
|
+
self._session_expiry = None
|
|
563
|
+
self._saml_attributes = {}
|
|
564
|
+
|
|
565
|
+
# Remove session file
|
|
566
|
+
if self.session_file.exists():
|
|
567
|
+
self.session_file.unlink()
|
|
568
|
+
|
|
569
|
+
logger.info(f"{self.name}: Logged out from Shibboleth")
|
|
570
|
+
|
|
571
|
+
async def get_session_info_async(self) -> Dict[str, Any]:
|
|
572
|
+
"""Get information about current session."""
|
|
573
|
+
is_authenticate_async = await self.is_authenticate_async()
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
"authenticate_async": is_authenticate_async,
|
|
577
|
+
"provider": "Shibboleth",
|
|
578
|
+
"institution": self.institution,
|
|
579
|
+
"username": self.username,
|
|
580
|
+
"idp_url": self.idp_url,
|
|
581
|
+
"entity_id": self.entity_id,
|
|
582
|
+
"saml_attributes": self._saml_attributes,
|
|
583
|
+
"session_expiry": self._session_expiry.isoformat() if self._session_expiry else None,
|
|
584
|
+
"cookies_count": len(self._cookies),
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
def detect_shibboleth_sp(self, url: str) -> Optional[Dict[str, str]]:
|
|
588
|
+
"""
|
|
589
|
+
Detect if a URL is protected by Shibboleth.
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
url: URL to check
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
Dictionary with SP information if detected, None otherwise
|
|
596
|
+
"""
|
|
597
|
+
parsed = urlparse(url)
|
|
598
|
+
domain = parsed.netloc
|
|
599
|
+
|
|
600
|
+
# Common Shibboleth SP paths
|
|
601
|
+
shibboleth_paths = [
|
|
602
|
+
"/Shibboleth.sso",
|
|
603
|
+
"/shibboleth",
|
|
604
|
+
"/saml",
|
|
605
|
+
"/idp",
|
|
606
|
+
"/wayf",
|
|
607
|
+
"/ds", # Discovery Service
|
|
608
|
+
]
|
|
609
|
+
|
|
610
|
+
# Check for common Shibboleth indicators
|
|
611
|
+
indicators = {
|
|
612
|
+
"jstor.org": {
|
|
613
|
+
"sp_type": "jstor",
|
|
614
|
+
"wayf": "https://www.jstor.org/wayf",
|
|
615
|
+
},
|
|
616
|
+
"projectmuse.org": {
|
|
617
|
+
"sp_type": "muse",
|
|
618
|
+
"wayf": "https://muse.jhu.edu/wayf",
|
|
619
|
+
},
|
|
620
|
+
"ebscohost.com": {
|
|
621
|
+
"sp_type": "ebsco",
|
|
622
|
+
"wayf": "https://search.ebscohost.com/wayf",
|
|
623
|
+
},
|
|
624
|
+
"ieee.org": {
|
|
625
|
+
"sp_type": "ieee",
|
|
626
|
+
"wayf": "https://ieeexplore.ieee.org/servlet/wayf",
|
|
627
|
+
},
|
|
628
|
+
"sciencedirect.com": {
|
|
629
|
+
"sp_type": "elsevier",
|
|
630
|
+
"wayf": "https://www.sciencedirect.com/wayf",
|
|
631
|
+
},
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
for domain_pattern, info in indicators.items():
|
|
635
|
+
if domain_pattern in domain:
|
|
636
|
+
return info
|
|
637
|
+
|
|
638
|
+
return None
|
|
639
|
+
|
|
640
|
+
def get_wayf_url(self, sp_url: str) -> Optional[str]:
|
|
641
|
+
"""
|
|
642
|
+
Get the WAYF (Where Are You From) URL for a Service Provider.
|
|
643
|
+
|
|
644
|
+
Args:
|
|
645
|
+
sp_url: Service Provider URL
|
|
646
|
+
|
|
647
|
+
Returns:
|
|
648
|
+
WAYF URL if known, None otherwise
|
|
649
|
+
"""
|
|
650
|
+
sp_info = self.detect_shibboleth_sp(sp_url)
|
|
651
|
+
if sp_info and "wayf" in sp_info:
|
|
652
|
+
return sp_info["wayf"]
|
|
653
|
+
|
|
654
|
+
# Return generic WAYF URL based on region
|
|
655
|
+
# This would need to be configured based on user's location
|
|
656
|
+
return self.wayf_urls[0] # Default to first WAYF
|
|
657
|
+
|
|
658
|
+
async def create_authenticate_async_browser(self) -> tuple[Browser, Any]:
|
|
659
|
+
"""
|
|
660
|
+
Create a browser instance with Shibboleth authentication.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
Tuple of (browser, context) with authentication cookies set
|
|
664
|
+
"""
|
|
665
|
+
if not await self.is_authenticate_async():
|
|
666
|
+
await self.authenticate_async()
|
|
667
|
+
|
|
668
|
+
if async_playwright is None:
|
|
669
|
+
raise ShibbolethError("Playwright is required")
|
|
670
|
+
|
|
671
|
+
p = await async_playwright().start()
|
|
672
|
+
browser = await p.chromium.launch(
|
|
673
|
+
headless=not self.debug_mode,
|
|
674
|
+
args=['--disable-blink-features=AutomationControlled']
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
context = await browser.new_context(
|
|
678
|
+
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
# Add authentication cookies
|
|
682
|
+
await context.add_cookies(self._full_cookies)
|
|
683
|
+
|
|
684
|
+
return browser, context
|
|
685
|
+
|
|
686
|
+
# EOF
|