scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,732 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-11 07:51:13 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/browser/utils/wait_redirects.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/browser/utils/wait_redirects.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
__FILE__ = __file__
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Enhanced redirect waiter that handles authentication endpoints properly.
|
|
18
|
+
|
|
19
|
+
This version continues waiting even after receiving 200 status from auth endpoints,
|
|
20
|
+
as they often perform client-side redirects.
|
|
21
|
+
|
|
22
|
+
Auth patterns are loaded from Scholar config (authentication.auth_endpoint_patterns).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
from typing import Dict, List
|
|
27
|
+
from urllib.parse import urlparse
|
|
28
|
+
|
|
29
|
+
from playwright.async_api import Page, Response
|
|
30
|
+
|
|
31
|
+
from scitex import logging
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# Cache for config-loaded patterns
|
|
36
|
+
_AUTH_ENDPOINTS: List[str] | None = None
|
|
37
|
+
_ARTICLE_PATTERNS: List[str] | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _load_auth_patterns(
|
|
41
|
+
func_name="_load_auth_patterns",
|
|
42
|
+
) -> tuple[List[str], List[str]]:
|
|
43
|
+
"""Load authentication patterns from Scholar config."""
|
|
44
|
+
global _AUTH_ENDPOINTS, _ARTICLE_PATTERNS
|
|
45
|
+
|
|
46
|
+
if _AUTH_ENDPOINTS is not None and _ARTICLE_PATTERNS is not None:
|
|
47
|
+
return _AUTH_ENDPOINTS, _ARTICLE_PATTERNS
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from ...config import ScholarConfig
|
|
51
|
+
|
|
52
|
+
config = ScholarConfig()
|
|
53
|
+
_AUTH_ENDPOINTS = config.resolve("auth_endpoint_patterns", None)
|
|
54
|
+
_ARTICLE_PATTERNS = config.resolve("article_url_patterns", None)
|
|
55
|
+
|
|
56
|
+
if not _AUTH_ENDPOINTS:
|
|
57
|
+
logger.warning(
|
|
58
|
+
f"{func_name}: No auth_endpoint_patterns in config, using fallback"
|
|
59
|
+
)
|
|
60
|
+
_AUTH_ENDPOINTS = _get_fallback_auth_patterns()
|
|
61
|
+
if not _ARTICLE_PATTERNS:
|
|
62
|
+
logger.warning(
|
|
63
|
+
f"{func_name}: No article_url_patterns in config, using fallback"
|
|
64
|
+
)
|
|
65
|
+
_ARTICLE_PATTERNS = _get_fallback_article_patterns()
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"{func_name}: Failed to load patterns from config: {str(e)}, using fallback"
|
|
70
|
+
)
|
|
71
|
+
_AUTH_ENDPOINTS = _get_fallback_auth_patterns()
|
|
72
|
+
_ARTICLE_PATTERNS = _get_fallback_article_patterns()
|
|
73
|
+
|
|
74
|
+
return _AUTH_ENDPOINTS, _ARTICLE_PATTERNS
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _get_fallback_auth_patterns(
|
|
78
|
+
func_name="_get_fallback_auth_patterns",
|
|
79
|
+
) -> List[str]:
|
|
80
|
+
"""Fallback auth patterns if config fails."""
|
|
81
|
+
return [
|
|
82
|
+
"go.openathens.net",
|
|
83
|
+
"login.openathens.net",
|
|
84
|
+
"auth.elsevier.com",
|
|
85
|
+
"login.elsevier.com",
|
|
86
|
+
"idp.nature.com",
|
|
87
|
+
"secure.jbs.elsevierhealth.com",
|
|
88
|
+
"shibboleth",
|
|
89
|
+
"saml",
|
|
90
|
+
"/ShibAuth/",
|
|
91
|
+
"/authenticate",
|
|
92
|
+
"/login",
|
|
93
|
+
"/signin",
|
|
94
|
+
"/sso/",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _get_fallback_article_patterns() -> List[str]:
|
|
99
|
+
"""Fallback article patterns if config fails."""
|
|
100
|
+
return [
|
|
101
|
+
"/science/article/",
|
|
102
|
+
"/articles/",
|
|
103
|
+
"/content/",
|
|
104
|
+
"/full/",
|
|
105
|
+
"/fulltext/",
|
|
106
|
+
"/doi/full/",
|
|
107
|
+
"/doi/abs/",
|
|
108
|
+
"/doi/pdf/",
|
|
109
|
+
".pdf",
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def is_auth_endpoint(url: str) -> bool:
|
|
114
|
+
"""Check if URL is likely an authentication/intermediate endpoint."""
|
|
115
|
+
auth_patterns, _ = _load_auth_patterns()
|
|
116
|
+
url_lower = url.lower()
|
|
117
|
+
parsed = urlparse(url_lower)
|
|
118
|
+
|
|
119
|
+
# Check hostname
|
|
120
|
+
for auth_pattern in auth_patterns:
|
|
121
|
+
if not auth_pattern.startswith("/"): # Hostname pattern
|
|
122
|
+
if parsed.hostname and auth_pattern in parsed.hostname:
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
# Check path
|
|
126
|
+
for auth_pattern in auth_patterns:
|
|
127
|
+
if auth_pattern.startswith("/"): # Path pattern
|
|
128
|
+
if auth_pattern in parsed.path:
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def is_final_article_url(url: str) -> bool:
|
|
135
|
+
"""Check if URL looks like a final article page."""
|
|
136
|
+
# Exclude chrome extensions and other non-article URLs
|
|
137
|
+
if (
|
|
138
|
+
url.startswith("chrome-extension://")
|
|
139
|
+
or url.startswith("about:")
|
|
140
|
+
or url.startswith("data:")
|
|
141
|
+
):
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
_, article_patterns = _load_auth_patterns()
|
|
145
|
+
url_lower = url.lower()
|
|
146
|
+
|
|
147
|
+
for indicator in article_patterns:
|
|
148
|
+
if indicator in url_lower:
|
|
149
|
+
return True
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def is_captcha_page(url: str) -> bool:
|
|
154
|
+
"""Check if URL or page indicates a CAPTCHA challenge."""
|
|
155
|
+
url_lower = url.lower()
|
|
156
|
+
|
|
157
|
+
# Cloudflare CAPTCHA indicators
|
|
158
|
+
captcha_indicators = [
|
|
159
|
+
"__cf_chl_rt_tk=", # Cloudflare challenge runtime token
|
|
160
|
+
"__cf_chl_tk=", # Cloudflare challenge token
|
|
161
|
+
"/cdn-cgi/challenge-platform/", # Cloudflare challenge page
|
|
162
|
+
"captcha",
|
|
163
|
+
"challenge",
|
|
164
|
+
"cf_clearance",
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
return any(indicator in url_lower for indicator in captcha_indicators)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def detect_captcha_on_page(
|
|
171
|
+
page: Page, func_name="detect_captcha_on_page"
|
|
172
|
+
) -> bool:
|
|
173
|
+
"""Detect if the current page shows a CAPTCHA challenge."""
|
|
174
|
+
try:
|
|
175
|
+
# Check URL first
|
|
176
|
+
if is_captcha_page(page.url):
|
|
177
|
+
return True
|
|
178
|
+
|
|
179
|
+
# Check for Cloudflare challenge elements
|
|
180
|
+
captcha_selectors = [
|
|
181
|
+
"#challenge-form",
|
|
182
|
+
".cf-challenge",
|
|
183
|
+
"[data-ray]", # Cloudflare Ray ID
|
|
184
|
+
"iframe[src*='captcha']",
|
|
185
|
+
"iframe[src*='recaptcha']",
|
|
186
|
+
"#cf-wrapper",
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for selector in captcha_selectors:
|
|
190
|
+
try:
|
|
191
|
+
element = await page.wait_for_selector(selector, timeout=500)
|
|
192
|
+
if element:
|
|
193
|
+
return True
|
|
194
|
+
except Exception:
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
# Check page title
|
|
198
|
+
try:
|
|
199
|
+
title = await page.title()
|
|
200
|
+
if "challenge" in title.lower() or "captcha" in title.lower():
|
|
201
|
+
return True
|
|
202
|
+
except Exception:
|
|
203
|
+
pass
|
|
204
|
+
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.debug(f"{func_name}: CAPTCHA detection error: {str(e)}")
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
async def wait_redirects(
|
|
213
|
+
page: Page,
|
|
214
|
+
timeout: int = 15_000,
|
|
215
|
+
max_redirects: int = 30,
|
|
216
|
+
show_progress: bool = True,
|
|
217
|
+
track_chain: bool = True,
|
|
218
|
+
wait_for_idle: bool = True,
|
|
219
|
+
auth_aware: bool = True, # New parameter
|
|
220
|
+
func_name="wait_redirects",
|
|
221
|
+
) -> Dict:
|
|
222
|
+
"""
|
|
223
|
+
Wait for redirect chain to complete, handling authentication endpoints.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
page: Playwright page object
|
|
227
|
+
timeout: Maximum wait time in milliseconds
|
|
228
|
+
max_redirects: Maximum number of redirects to follow
|
|
229
|
+
show_progress: Show popup messages during redirects
|
|
230
|
+
track_chain: Whether to track detailed redirect chain
|
|
231
|
+
wait_for_idle: Whether to wait for network idle after redirects
|
|
232
|
+
auth_aware: Continue waiting after auth endpoints (default: True)
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
dict with redirect information
|
|
236
|
+
"""
|
|
237
|
+
if show_progress:
|
|
238
|
+
from scitex.browser import browser_logger
|
|
239
|
+
|
|
240
|
+
start_time = asyncio.get_event_loop().time()
|
|
241
|
+
start_url = page.url
|
|
242
|
+
|
|
243
|
+
if show_progress:
|
|
244
|
+
await browser_logger.info(
|
|
245
|
+
page,
|
|
246
|
+
f"{func_name}: Waiting for redirects (max {timeout/1000:.0f}s)...",
|
|
247
|
+
duration_ms=timeout,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Countdown timer task
|
|
251
|
+
async def show_countdown():
|
|
252
|
+
"""Show countdown timer in popup with ASCII progress bar."""
|
|
253
|
+
if not show_progress:
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
timeout_sec = timeout / 1000
|
|
257
|
+
start = asyncio.get_event_loop().time()
|
|
258
|
+
|
|
259
|
+
while not navigation_complete.is_set():
|
|
260
|
+
elapsed = asyncio.get_event_loop().time() - start
|
|
261
|
+
remaining = max(0, timeout_sec - elapsed)
|
|
262
|
+
|
|
263
|
+
if remaining <= 0:
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
# Create simple ASCII progress bar
|
|
267
|
+
progress = elapsed / timeout_sec
|
|
268
|
+
bar_length = 20
|
|
269
|
+
filled = int(progress * bar_length)
|
|
270
|
+
bar = "█" * filled + "░" * (bar_length - filled)
|
|
271
|
+
|
|
272
|
+
# Update every 2 seconds
|
|
273
|
+
await asyncio.sleep(2)
|
|
274
|
+
|
|
275
|
+
if not navigation_complete.is_set():
|
|
276
|
+
await browser_logger.debug(
|
|
277
|
+
page,
|
|
278
|
+
f"{func_name}: [{bar}] {remaining:.0f}s",
|
|
279
|
+
duration_ms=2500,
|
|
280
|
+
take_screenshot=False,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Tracking variables
|
|
284
|
+
redirect_chain = [] if track_chain else None
|
|
285
|
+
redirect_count = 0
|
|
286
|
+
navigation_complete = asyncio.Event()
|
|
287
|
+
timed_out = False
|
|
288
|
+
last_url = start_url
|
|
289
|
+
last_response_time = start_time
|
|
290
|
+
found_article = False
|
|
291
|
+
|
|
292
|
+
def track_response(response: Response):
|
|
293
|
+
nonlocal redirect_count, last_url, last_response_time, found_article
|
|
294
|
+
|
|
295
|
+
# Only track main frame responses
|
|
296
|
+
if response.frame != page.main_frame:
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
status = response.status
|
|
300
|
+
url = response.url
|
|
301
|
+
timestamp = asyncio.get_event_loop().time()
|
|
302
|
+
last_response_time = timestamp
|
|
303
|
+
|
|
304
|
+
# Track chain if requested
|
|
305
|
+
if track_chain:
|
|
306
|
+
redirect_chain.append(
|
|
307
|
+
{
|
|
308
|
+
"step": len(redirect_chain) + 1,
|
|
309
|
+
"url": url,
|
|
310
|
+
"status": status,
|
|
311
|
+
"is_redirect": 300 <= status < 400,
|
|
312
|
+
"is_auth": is_auth_endpoint(url),
|
|
313
|
+
"timestamp": timestamp,
|
|
314
|
+
"time_from_start_ms": (timestamp - start_time) * 1000,
|
|
315
|
+
}
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
logger.debug(f"{func_name}: Response: {url[:80]} ({status})")
|
|
319
|
+
|
|
320
|
+
# Show progress if enabled
|
|
321
|
+
if show_progress and (300 <= status < 400 or is_auth_endpoint(url)):
|
|
322
|
+
redirect_count += 1
|
|
323
|
+
asyncio.create_task(
|
|
324
|
+
browser_logger.debug(
|
|
325
|
+
page,
|
|
326
|
+
f"{func_name}: {'Auth' if is_auth_endpoint(url) else 'Redirect'} {redirect_count}: {url[:40]}...",
|
|
327
|
+
duration_ms=1000,
|
|
328
|
+
)
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Check if we reached final article
|
|
332
|
+
if is_final_article_url(url) and 200 <= status < 300:
|
|
333
|
+
found_article = True
|
|
334
|
+
logger.debug(f"{func_name}: Found article page: {url[:80]}")
|
|
335
|
+
if show_progress:
|
|
336
|
+
asyncio.create_task(
|
|
337
|
+
browser_logger.debug(
|
|
338
|
+
page,
|
|
339
|
+
f"{func_name}: Article found and still waiting: {url[:40]}...",
|
|
340
|
+
duration_ms=2000,
|
|
341
|
+
)
|
|
342
|
+
)
|
|
343
|
+
# Don't set complete immediately - wait a bit for any final redirects
|
|
344
|
+
asyncio.create_task(_delayed_complete())
|
|
345
|
+
|
|
346
|
+
# Handle different response types
|
|
347
|
+
if 300 <= status < 400:
|
|
348
|
+
redirect_count += 1
|
|
349
|
+
if redirect_count >= max_redirects:
|
|
350
|
+
logger.warning(
|
|
351
|
+
f"{func_name}: Max redirects ({max_redirects}) reached"
|
|
352
|
+
)
|
|
353
|
+
navigation_complete.set()
|
|
354
|
+
|
|
355
|
+
elif 200 <= status < 300:
|
|
356
|
+
# For auth endpoints, continue waiting
|
|
357
|
+
if auth_aware and is_auth_endpoint(url):
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"{func_name}: Auth endpoint reached, continuing to wait: {url[:80]}"
|
|
360
|
+
)
|
|
361
|
+
if show_progress:
|
|
362
|
+
asyncio.create_task(
|
|
363
|
+
browser_logger.info(
|
|
364
|
+
page,
|
|
365
|
+
f"{func_name}: Processing authentication...",
|
|
366
|
+
duration_ms=2000,
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
# Don't complete yet - auth endpoints often do client-side redirects
|
|
370
|
+
elif not auth_aware or found_article:
|
|
371
|
+
# Non-auth endpoint or article found - likely complete
|
|
372
|
+
asyncio.create_task(_delayed_complete())
|
|
373
|
+
|
|
374
|
+
elif status >= 400:
|
|
375
|
+
logger.debug(f"{func_name}: Error response: {status} for {url}")
|
|
376
|
+
navigation_complete.set()
|
|
377
|
+
|
|
378
|
+
last_url = url
|
|
379
|
+
|
|
380
|
+
async def _delayed_complete():
|
|
381
|
+
"""Set navigation complete after a short delay to catch final redirects."""
|
|
382
|
+
await asyncio.sleep(2) # Reduced from 5 to 2 seconds
|
|
383
|
+
if not navigation_complete.is_set():
|
|
384
|
+
navigation_complete.set()
|
|
385
|
+
|
|
386
|
+
async def check_url_stability():
|
|
387
|
+
"""Monitor URL changes, network activity, and page state for robust completion."""
|
|
388
|
+
stable_count = 0
|
|
389
|
+
last_checked_url = page.url
|
|
390
|
+
last_dom_state = None
|
|
391
|
+
dom_stable_count = 0
|
|
392
|
+
captcha_detected = False
|
|
393
|
+
captcha_wait_start = None
|
|
394
|
+
|
|
395
|
+
await asyncio.sleep(1)
|
|
396
|
+
|
|
397
|
+
while not navigation_complete.is_set():
|
|
398
|
+
try:
|
|
399
|
+
current_url = page.url
|
|
400
|
+
current_time = asyncio.get_event_loop().time()
|
|
401
|
+
|
|
402
|
+
# Calculate time since last network activity
|
|
403
|
+
time_since_activity = current_time - last_response_time
|
|
404
|
+
|
|
405
|
+
# Check for CAPTCHA
|
|
406
|
+
if not captcha_detected:
|
|
407
|
+
captcha_detected = await detect_captcha_on_page(page)
|
|
408
|
+
if captcha_detected:
|
|
409
|
+
captcha_wait_start = current_time
|
|
410
|
+
logger.warning(
|
|
411
|
+
f"{func_name}: CAPTCHA detected on page: {current_url[:80]}"
|
|
412
|
+
)
|
|
413
|
+
if show_progress:
|
|
414
|
+
from scitex.browser import (
|
|
415
|
+
browser_logger,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
asyncio.create_task(
|
|
419
|
+
browser_logger.info(
|
|
420
|
+
page,
|
|
421
|
+
f"{func_name}: CAPTCHA detected - waiting for solver extension...",
|
|
422
|
+
duration_ms=5000,
|
|
423
|
+
)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Check page load state
|
|
427
|
+
try:
|
|
428
|
+
load_state = await page.evaluate(
|
|
429
|
+
"() => document.readyState"
|
|
430
|
+
)
|
|
431
|
+
page_loaded = load_state == "complete"
|
|
432
|
+
except:
|
|
433
|
+
page_loaded = False
|
|
434
|
+
|
|
435
|
+
# Check DOM stability (body exists and has content)
|
|
436
|
+
try:
|
|
437
|
+
dom_state = await page.evaluate(
|
|
438
|
+
"""
|
|
439
|
+
() => {
|
|
440
|
+
const body = document.body;
|
|
441
|
+
if (!body) return 'no-body';
|
|
442
|
+
const links = document.querySelectorAll('a').length;
|
|
443
|
+
const scripts = document.querySelectorAll('script').length;
|
|
444
|
+
return `${body.childElementCount}-${links}-${scripts}`;
|
|
445
|
+
}
|
|
446
|
+
"""
|
|
447
|
+
)
|
|
448
|
+
dom_changed = dom_state != last_dom_state
|
|
449
|
+
if not dom_changed and last_dom_state:
|
|
450
|
+
dom_stable_count += 1
|
|
451
|
+
else:
|
|
452
|
+
dom_stable_count = 0
|
|
453
|
+
last_dom_state = dom_state
|
|
454
|
+
except:
|
|
455
|
+
dom_state = None
|
|
456
|
+
dom_stable_count = 0
|
|
457
|
+
|
|
458
|
+
# Check if URL changed
|
|
459
|
+
if current_url != last_checked_url:
|
|
460
|
+
logger.debug(
|
|
461
|
+
f"{func_name}: URL changed: {current_url[:80]}"
|
|
462
|
+
)
|
|
463
|
+
last_checked_url = current_url
|
|
464
|
+
stable_count = 0
|
|
465
|
+
dom_stable_count = 0
|
|
466
|
+
|
|
467
|
+
# Check if we reached an article
|
|
468
|
+
if is_final_article_url(current_url):
|
|
469
|
+
found_article = True
|
|
470
|
+
logger.info(
|
|
471
|
+
f"{func_name}: Article URL detected: {current_url[:80]}"
|
|
472
|
+
)
|
|
473
|
+
await asyncio.sleep(
|
|
474
|
+
1
|
|
475
|
+
) # Short wait for final resources
|
|
476
|
+
navigation_complete.set()
|
|
477
|
+
break
|
|
478
|
+
else:
|
|
479
|
+
stable_count += 1
|
|
480
|
+
|
|
481
|
+
# ROBUST COMPLETION LOGIC:
|
|
482
|
+
# Combine multiple signals: URL stability, network inactivity, page loaded, DOM stable
|
|
483
|
+
|
|
484
|
+
# CAPTCHA path: Wait much longer for CAPTCHA solver extension
|
|
485
|
+
if captcha_detected:
|
|
486
|
+
captcha_wait_time = (
|
|
487
|
+
current_time - captcha_wait_start
|
|
488
|
+
if captcha_wait_start
|
|
489
|
+
else 0
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Check if CAPTCHA is still present
|
|
493
|
+
still_captcha = await detect_captcha_on_page(page)
|
|
494
|
+
|
|
495
|
+
if not still_captcha:
|
|
496
|
+
# CAPTCHA solved! Wait a bit for redirect
|
|
497
|
+
logger.info(
|
|
498
|
+
f"{func_name}: CAPTCHA appears to be solved, waiting for redirect..."
|
|
499
|
+
)
|
|
500
|
+
if show_progress:
|
|
501
|
+
from scitex.browser import (
|
|
502
|
+
browser_logger,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
asyncio.create_task(
|
|
506
|
+
browser_logger.info(
|
|
507
|
+
page,
|
|
508
|
+
f"{func_name}: CAPTCHA solved! Waiting for redirect...",
|
|
509
|
+
duration_ms=2000,
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
await asyncio.sleep(
|
|
513
|
+
3
|
|
514
|
+
) # Wait for redirect after CAPTCHA
|
|
515
|
+
captcha_detected = False
|
|
516
|
+
captcha_wait_start = None
|
|
517
|
+
stable_count = 0
|
|
518
|
+
dom_stable_count = 0
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
# Give CAPTCHA solver up to 60 seconds
|
|
522
|
+
if captcha_wait_time < 60:
|
|
523
|
+
if (
|
|
524
|
+
int(captcha_wait_time) % 10 == 0
|
|
525
|
+
and captcha_wait_time > 0
|
|
526
|
+
):
|
|
527
|
+
logger.info(
|
|
528
|
+
f"{func_name}: CAPTCHA solver working... ({int(60 - captcha_wait_time)}s remaining)"
|
|
529
|
+
)
|
|
530
|
+
continue
|
|
531
|
+
else:
|
|
532
|
+
logger.warning(
|
|
533
|
+
f"{func_name}: CAPTCHA solver timeout (60s) - continuing anyway"
|
|
534
|
+
)
|
|
535
|
+
if show_progress:
|
|
536
|
+
from scitex.browser import (
|
|
537
|
+
browser_logger,
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
asyncio.create_task(
|
|
541
|
+
browser_logger.info(
|
|
542
|
+
page,
|
|
543
|
+
f"{func_name}: CAPTCHA solver timeout - manual intervention may be needed",
|
|
544
|
+
duration_ms=3000,
|
|
545
|
+
)
|
|
546
|
+
)
|
|
547
|
+
captcha_detected = False # Stop waiting for CAPTCHA
|
|
548
|
+
|
|
549
|
+
# Fast path: Everything stable and page loaded
|
|
550
|
+
if (
|
|
551
|
+
stable_count >= 2
|
|
552
|
+
and time_since_activity >= 2
|
|
553
|
+
and page_loaded
|
|
554
|
+
and dom_stable_count >= 2
|
|
555
|
+
):
|
|
556
|
+
if not is_auth_endpoint(current_url) or found_article:
|
|
557
|
+
logger.debug(
|
|
558
|
+
f"{func_name}: Complete: URL+network+DOM stable (2s), page loaded"
|
|
559
|
+
)
|
|
560
|
+
navigation_complete.set()
|
|
561
|
+
break
|
|
562
|
+
|
|
563
|
+
# Medium path: URL and network stable for longer
|
|
564
|
+
elif stable_count >= 3 and time_since_activity >= 3:
|
|
565
|
+
if not is_auth_endpoint(current_url) or found_article:
|
|
566
|
+
logger.debug(
|
|
567
|
+
f"{func_name}: Complete: URL+network stable (3s)"
|
|
568
|
+
)
|
|
569
|
+
navigation_complete.set()
|
|
570
|
+
break
|
|
571
|
+
|
|
572
|
+
# Auth page path: Wait longer for delayed redirects
|
|
573
|
+
elif stable_count >= 5 and time_since_activity >= 5:
|
|
574
|
+
if is_auth_endpoint(current_url):
|
|
575
|
+
# Extra check: make sure DOM is stable too
|
|
576
|
+
if dom_stable_count >= 3:
|
|
577
|
+
logger.debug(
|
|
578
|
+
f"{func_name}: Complete: Auth page stable (5s) with stable DOM"
|
|
579
|
+
)
|
|
580
|
+
navigation_complete.set()
|
|
581
|
+
break
|
|
582
|
+
|
|
583
|
+
# Timeout path: Absolute max wait (extended for potential CAPTCHA)
|
|
584
|
+
elif stable_count >= 30:
|
|
585
|
+
logger.warning(
|
|
586
|
+
f"{func_name}: Complete: Timeout (30s) - URL: {current_url[:80]}"
|
|
587
|
+
)
|
|
588
|
+
navigation_complete.set()
|
|
589
|
+
break
|
|
590
|
+
|
|
591
|
+
except Exception as e:
|
|
592
|
+
logger.debug(f"{func_name}: Error in stability check: {e}")
|
|
593
|
+
# On error, if we've waited a reasonable time, complete
|
|
594
|
+
if stable_count >= 5:
|
|
595
|
+
logger.warning(
|
|
596
|
+
f"{func_name}: Complete: Error after 5s - {str(e)[:50]}"
|
|
597
|
+
)
|
|
598
|
+
navigation_complete.set()
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
# Set up response tracking
|
|
602
|
+
page.on("response", track_response)
|
|
603
|
+
|
|
604
|
+
# Start URL stability checker and countdown timer
|
|
605
|
+
stability_task = asyncio.create_task(check_url_stability())
|
|
606
|
+
countdown_task = asyncio.create_task(show_countdown())
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
# Wait for navigation to complete
|
|
610
|
+
try:
|
|
611
|
+
await asyncio.wait_for(
|
|
612
|
+
navigation_complete.wait(), timeout=timeout / 1000
|
|
613
|
+
)
|
|
614
|
+
except asyncio.TimeoutError:
|
|
615
|
+
timed_out = True
|
|
616
|
+
logger.warning(
|
|
617
|
+
f"{func_name}: Redirect wait timeout after {timeout}ms"
|
|
618
|
+
)
|
|
619
|
+
if show_progress:
|
|
620
|
+
await browser_logger.info(
|
|
621
|
+
page,
|
|
622
|
+
f"{func_name}: Redirect timeout, finalizing...",
|
|
623
|
+
duration_ms=1500,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# Cancel stability checker and countdown
|
|
627
|
+
stability_task.cancel()
|
|
628
|
+
countdown_task.cancel()
|
|
629
|
+
|
|
630
|
+
# Wait for network idle if requested
|
|
631
|
+
if wait_for_idle and not timed_out:
|
|
632
|
+
try:
|
|
633
|
+
idle_timeout = min(5000, timeout // 4)
|
|
634
|
+
await page.wait_for_load_state(
|
|
635
|
+
"networkidle", timeout=idle_timeout
|
|
636
|
+
)
|
|
637
|
+
except:
|
|
638
|
+
logger.debug(f"{func_name}: Network idle wait failed")
|
|
639
|
+
|
|
640
|
+
# Calculate results
|
|
641
|
+
end_time = asyncio.get_event_loop().time()
|
|
642
|
+
total_time_ms = (end_time - start_time) * 1000
|
|
643
|
+
final_url = page.url
|
|
644
|
+
|
|
645
|
+
# Determine success
|
|
646
|
+
success = (
|
|
647
|
+
not timed_out
|
|
648
|
+
and (final_url != start_url or redirect_count > 0)
|
|
649
|
+
and (not is_auth_endpoint(final_url) or found_article)
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
result = {
|
|
653
|
+
"success": success,
|
|
654
|
+
"final_url": final_url,
|
|
655
|
+
"redirect_count": redirect_count,
|
|
656
|
+
"total_time_ms": round(total_time_ms, 2),
|
|
657
|
+
"timed_out": timed_out,
|
|
658
|
+
"found_article": found_article,
|
|
659
|
+
"stopped_at_auth": is_auth_endpoint(final_url)
|
|
660
|
+
and not found_article,
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
if track_chain:
|
|
664
|
+
result["redirect_chain"] = redirect_chain
|
|
665
|
+
|
|
666
|
+
# Log results
|
|
667
|
+
if success:
|
|
668
|
+
logger.info(
|
|
669
|
+
f"{func_name}: Redirects complete: {start_url[:40]} -> {final_url[:40]} "
|
|
670
|
+
f"({redirect_count} redirects, {total_time_ms:.0f}ms)"
|
|
671
|
+
)
|
|
672
|
+
elif result.get("stopped_at_auth"):
|
|
673
|
+
logger.warning(
|
|
674
|
+
f"{func_name}: Stopped at auth endpoint: {final_url[:80]} "
|
|
675
|
+
f"(after {redirect_count} redirects, {total_time_ms:.0f}ms)"
|
|
676
|
+
)
|
|
677
|
+
elif timed_out:
|
|
678
|
+
logger.warning(
|
|
679
|
+
f"{func_name}: Redirect wait timed out after {total_time_ms:.0f}ms"
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
logger.debug(f"{func_name}: No redirects detected")
|
|
683
|
+
|
|
684
|
+
return result
|
|
685
|
+
|
|
686
|
+
except Exception as e:
|
|
687
|
+
logger.error(f"{func_name}: Wait redirects failed: {e}")
|
|
688
|
+
end_time = asyncio.get_event_loop().time()
|
|
689
|
+
return {
|
|
690
|
+
"success": False,
|
|
691
|
+
"final_url": page.url,
|
|
692
|
+
"redirect_count": redirect_count,
|
|
693
|
+
"total_time_ms": round((end_time - start_time) * 1000, 2),
|
|
694
|
+
"timed_out": False,
|
|
695
|
+
"error": str(e),
|
|
696
|
+
}
|
|
697
|
+
finally:
|
|
698
|
+
# Clean up
|
|
699
|
+
try:
|
|
700
|
+
page.remove_listener("response", track_response)
|
|
701
|
+
stability_task.cancel()
|
|
702
|
+
countdown_task.cancel()
|
|
703
|
+
except:
|
|
704
|
+
pass
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# INFO: BrowserLogger - OpenURLResolver: Navigating to resolver for 10.1016/j.clinph.2024.09.017...
|
|
708
|
+
# INFO: BrowserLogger - Screenshot: 20251011_042805_692-INFO-OpenURLResolver__Navigating_to_resolver_for_10.1016_j.clinph.2024.09.017....png
|
|
709
|
+
# WARN: OpenURL attempt 1 failed: Page.goto: Page crashed
|
|
710
|
+
# Call log:
|
|
711
|
+
# - navigating to "https://unimelb.hosted.exlibrisgroup.com/sfxlcl41?doi=10.1016/j.clinph.2024.09.017", waiting until "domcontentloaded"
|
|
712
|
+
# , retrying in 2s
|
|
713
|
+
# INFO: BrowserLogger - OpenURLResolver: ✗ Attempt 1 failed, retrying in 2s...
|
|
714
|
+
# INFO: BrowserLogger - OpenURLResolver: Navigating to resolver for 10.1016/j.clinph.2024.09.017...
|
|
715
|
+
# WARN: OpenURL attempt 2 failed: Page.goto: Page crashed
|
|
716
|
+
# Call log:
|
|
717
|
+
# - navigating to "https://unimelb.hosted.exlibrisgroup.com/sfxlcl41?doi=10.1016/j.clinph.2024.09.017", waiting until "domcontentloaded"
|
|
718
|
+
# , retrying in 4s
|
|
719
|
+
# INFO: BrowserLogger - OpenURLResolver: ✗ Attempt 2 failed, retrying in 4s...
|
|
720
|
+
# INFO: BrowserLogger - OpenURLResolver: Navigating to resolver for 10.1016/j.clinph.2024.09.017...
|
|
721
|
+
# ERRO: OpenURL resolution failed after 3 attempts: Page.goto: Page crashed
|
|
722
|
+
# Call log:
|
|
723
|
+
# - navigating to "https://unimelb.hosted.exlibrisgroup.com/sfxlcl41?doi=10.1016/j.clinph.2024.09.017", waiting until "domcontentloaded"
|
|
724
|
+
|
|
725
|
+
# INFO: BrowserLogger - OpenURLResolver: ✗ FAILED after 3 attempts: Page.goto: Page crashed
|
|
726
|
+
# Call log:
|
|
727
|
+
# - navigating to "https://unimelb.hosted.exli
|
|
728
|
+
# INFO: BrowserLogger - OpenURLResolver: f10.1016/j.clinph.2024.09.017 - query not resolved
|
|
729
|
+
# WARN: AuthenticationGateway: OpenURL resolution failed
|
|
730
|
+
# INFO: BrowserLogger - AuthenticationGateway: ✗ Could not resolve to publisher URL
|
|
731
|
+
|
|
732
|
+
# EOF
|