scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,705 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-11 07:53:46 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/browser/ScholarBrowserManager.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/browser/ScholarBrowserManager.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
__FILE__ = __file__
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import subprocess
|
|
18
|
+
import time
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Union
|
|
22
|
+
|
|
23
|
+
from playwright.async_api import Browser, BrowserContext, async_playwright
|
|
24
|
+
|
|
25
|
+
from scitex import logging
|
|
26
|
+
from scitex.browser.automation import CookieAutoAcceptor
|
|
27
|
+
from scitex.browser.core import BrowserMixin, ChromeProfileManager
|
|
28
|
+
from scitex.browser.stealth import StealthManager
|
|
29
|
+
from scitex.scholar.browser.utils.close_unwanted_pages import (
|
|
30
|
+
close_unwanted_pages,
|
|
31
|
+
)
|
|
32
|
+
from scitex.scholar.config import ScholarConfig
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
Browser Manager with persistent context support.
|
|
38
|
+
|
|
39
|
+
_persistent_context is a **persistent browser context** that stays alive across multiple operations.
|
|
40
|
+
|
|
41
|
+
## Regular vs Persistent Context
|
|
42
|
+
|
|
43
|
+
**Regular context** (new each time):
|
|
44
|
+
```python
|
|
45
|
+
browser = await playwright.chromium.launch()
|
|
46
|
+
context = await browser.new_context() # New context each time
|
|
47
|
+
page = await context.new_page()
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Persistent context** (reused):
|
|
51
|
+
```python
|
|
52
|
+
# Created once in _launch_persistent_context_async()
|
|
53
|
+
self._persistent_context = await self._persistent_playwright.chromium.launch_persistent_context(
|
|
54
|
+
user_data_dir=str(profile_dir), # Persistent profile
|
|
55
|
+
headless=False,
|
|
56
|
+
args=[...extensions...]
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Reused multiple times
|
|
60
|
+
if hasattr(self, "_persistent_context") and self._persistent_context:
|
|
61
|
+
context = self._persistent_context # Same context
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Benefits of Persistent Context
|
|
65
|
+
|
|
66
|
+
1. **Extensions persist** - Extensions loaded once, available for all pages
|
|
67
|
+
2. **Authentication cookies persist** - No need to re-login
|
|
68
|
+
3. **Profile data persistent** - Bookmarks, history, settings maintained
|
|
69
|
+
4. **Performance** - Faster page creation (no browser restart)
|
|
70
|
+
5. **Session continuity** - Maintains login state across operations
|
|
71
|
+
|
|
72
|
+
## In Your Code
|
|
73
|
+
|
|
74
|
+
`_persistent_context` is set in `_launch_persistent_context_async()` and reused in `get_authenticated_browser_and_context_async()`. This allows multiple pages to share the same authenticated, extension-enabled browser session.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ScholarBrowserManager(BrowserMixin):
|
|
79
|
+
"""Manages a local browser instance with stealth enhancements and invisible mode."""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
browser_mode=None,
|
|
84
|
+
auth_manager=None,
|
|
85
|
+
chrome_profile_name=None,
|
|
86
|
+
use_zenrows_proxy=False,
|
|
87
|
+
config: ScholarConfig = None,
|
|
88
|
+
):
|
|
89
|
+
"""
|
|
90
|
+
Initialize ScholarBrowserManager with invisible browser capabilities.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
auth_manager: Authentication manager instance
|
|
94
|
+
config: Scholar configuration instance
|
|
95
|
+
"""
|
|
96
|
+
# Store scholar_config for use by components like ChromeProfileManager
|
|
97
|
+
self.name = self.__class__.__name__
|
|
98
|
+
self.config = config or ScholarConfig()
|
|
99
|
+
|
|
100
|
+
# Browser
|
|
101
|
+
self.browser_mode = self.config.resolve(
|
|
102
|
+
"browser_mode", browser_mode, default="interactive"
|
|
103
|
+
)
|
|
104
|
+
super().__init__(mode=self.browser_mode)
|
|
105
|
+
self._set_interactive_or_stealth(browser_mode)
|
|
106
|
+
|
|
107
|
+
# ZenRows
|
|
108
|
+
self.use_zenrows_proxy = use_zenrows_proxy
|
|
109
|
+
if use_zenrows_proxy:
|
|
110
|
+
from .remote.ZenRowsProxyManager import ZenRowsProxyManager
|
|
111
|
+
|
|
112
|
+
self.zenrows_proxy_manager = ZenRowsProxyManager(config=config)
|
|
113
|
+
|
|
114
|
+
# Library Authentication
|
|
115
|
+
self.auth_manager = auth_manager
|
|
116
|
+
if auth_manager is None:
|
|
117
|
+
logger.fail(
|
|
118
|
+
f"{self.name}: auth_manager not passed. University Authentication will not be enabled."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Chrome Extension
|
|
122
|
+
self.chrome_profile_manager = ChromeProfileManager(
|
|
123
|
+
chrome_profile_name, config=self.config
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Stealth
|
|
127
|
+
self.stealth_manager = StealthManager(
|
|
128
|
+
self.viewport_size, self.spoof_dimension
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Cookie
|
|
132
|
+
self.cookie_acceptor = CookieAutoAcceptor()
|
|
133
|
+
|
|
134
|
+
# Initialize persistent browser attributes
|
|
135
|
+
self._persistent_browser = None
|
|
136
|
+
self._persistent_context = None
|
|
137
|
+
self._persistent_playwright = None
|
|
138
|
+
|
|
139
|
+
def _set_interactive_or_stealth(self, browser_mode):
|
|
140
|
+
# Interactive or Stealth
|
|
141
|
+
if browser_mode == "interactive":
|
|
142
|
+
self.headless = False
|
|
143
|
+
self.spoof_dimension = False
|
|
144
|
+
self.viewport_size = (1920, 1080)
|
|
145
|
+
self.display = 0
|
|
146
|
+
elif browser_mode == "stealth":
|
|
147
|
+
# Must be False for dimension spoofing to work
|
|
148
|
+
self.headless = False
|
|
149
|
+
self.spoof_dimension = True
|
|
150
|
+
# This only affects internal viewport, not window size
|
|
151
|
+
# self.viewport_size = (1, 1)
|
|
152
|
+
self.viewport_size = (1920, 1080)
|
|
153
|
+
self.display = 99
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
"browser_mode must be eighther of 'interactive' or 'stealth'"
|
|
157
|
+
)
|
|
158
|
+
logger.debug(f"{self.name}: Browser initialized:")
|
|
159
|
+
logger.debug(f"{self.name}: headless: {self.headless}")
|
|
160
|
+
logger.debug(f"{self.name}: spoof_dimension: {self.spoof_dimension}")
|
|
161
|
+
logger.debug(f"{self.name}: viewport_size: {self.viewport_size}")
|
|
162
|
+
|
|
163
|
+
async def get_authenticated_browser_and_context_async(
|
|
164
|
+
self, **context_options
|
|
165
|
+
) -> tuple[Browser, BrowserContext]:
|
|
166
|
+
"""Get browser context with authentication cookies and extensions loaded."""
|
|
167
|
+
if self.auth_manager is None:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"{self.name}: "
|
|
170
|
+
"Authentication manager is not set. "
|
|
171
|
+
"To use this method, please initialize ScholarBrowserManager with an auth_manager."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
await self.auth_manager.ensure_authenticate_async()
|
|
175
|
+
|
|
176
|
+
browser = (
|
|
177
|
+
await self._get_persistent_browser_with_profile_but_not_with_auth_async()
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
if hasattr(self, "_persistent_context") and self._persistent_context:
|
|
181
|
+
context = self._persistent_context
|
|
182
|
+
logger.info(
|
|
183
|
+
f"{self.name}: Using persistent context with profile and extensions"
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"{self.name}: Falling back to regular context creation"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
auth_options = await self.auth_manager.get_auth_options()
|
|
191
|
+
context_options.update(auth_options)
|
|
192
|
+
|
|
193
|
+
context = await self._new_context_async(browser, **context_options)
|
|
194
|
+
|
|
195
|
+
return browser, context
|
|
196
|
+
|
|
197
|
+
async def _new_context_async(
|
|
198
|
+
self, browser: Browser, **context_options
|
|
199
|
+
) -> BrowserContext:
|
|
200
|
+
"""Creates a new browser context with stealth options and invisible mode applied."""
|
|
201
|
+
stealth_options = self.stealth_manager.get_stealth_options()
|
|
202
|
+
context = await browser.new_context(
|
|
203
|
+
{**stealth_options, **context_options}
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Apply stealth script
|
|
207
|
+
await context.add_init_script(self.stealth_manager.get_init_script())
|
|
208
|
+
await context.add_init_script(
|
|
209
|
+
self.stealth_manager.get_dimension_spoofing_script()
|
|
210
|
+
)
|
|
211
|
+
await context.add_init_script(
|
|
212
|
+
self.cookie_acceptor.get_auto_acceptor_script()
|
|
213
|
+
)
|
|
214
|
+
return context
|
|
215
|
+
|
|
216
|
+
# ########################################
|
|
217
|
+
# Persistent Context
|
|
218
|
+
# ########################################
|
|
219
|
+
async def _get_persistent_browser_with_profile_but_not_with_auth_async(
|
|
220
|
+
self,
|
|
221
|
+
) -> Browser:
|
|
222
|
+
if (
|
|
223
|
+
self._persistent_browser is None
|
|
224
|
+
or self._persistent_browser.is_connected() is False
|
|
225
|
+
):
|
|
226
|
+
await self.auth_manager.ensure_authenticate_async()
|
|
227
|
+
await self._ensure_playwright_started_async()
|
|
228
|
+
await self._ensure_extensions_installed_async()
|
|
229
|
+
self._verify_xvfb_running()
|
|
230
|
+
await self._launch_persistent_context_async()
|
|
231
|
+
return self._persistent_browser
|
|
232
|
+
|
|
233
|
+
async def _ensure_playwright_started_async(self):
|
|
234
|
+
if self._persistent_playwright is None:
|
|
235
|
+
self._persistent_playwright = await async_playwright().start()
|
|
236
|
+
|
|
237
|
+
async def _ensure_extensions_installed_async(self):
|
|
238
|
+
if not self.chrome_profile_manager.check_extensions_installed():
|
|
239
|
+
logger.error(f"{self.name}: Chrome extensions not verified")
|
|
240
|
+
try:
|
|
241
|
+
logger.warning(f"{self.name}: Trying install extensions")
|
|
242
|
+
await self.chrome_profile_manager.install_extensions_manually_if_not_installed_async()
|
|
243
|
+
except Exception as e:
|
|
244
|
+
logger.error(f"{self.name}: Installation failed: {str(e)}")
|
|
245
|
+
|
|
246
|
+
async def _launch_persistent_context_async(self):
|
|
247
|
+
persistent_context_launch_options = (
|
|
248
|
+
self._build_persistent_context_launch_options()
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# # Create preferences to disable PDF viewer and force downloads
|
|
252
|
+
# self._set_pdf_download_preferences()
|
|
253
|
+
|
|
254
|
+
# Clean up any existing singleton lock files that might prevent browser launch
|
|
255
|
+
profile_dir = self.chrome_profile_manager.profile_dir
|
|
256
|
+
|
|
257
|
+
# Multiple possible lock file locations
|
|
258
|
+
lock_files = [
|
|
259
|
+
profile_dir / "SingletonLock",
|
|
260
|
+
profile_dir / "SingletonSocket",
|
|
261
|
+
profile_dir / "SingletonCookie",
|
|
262
|
+
profile_dir / "lockfile",
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
removed_locks = 0
|
|
266
|
+
for lock_file in lock_files:
|
|
267
|
+
if lock_file.exists():
|
|
268
|
+
try:
|
|
269
|
+
lock_file.unlink()
|
|
270
|
+
logger.debug(
|
|
271
|
+
f"{self.name}: Removed Chrome lock file: {lock_file.name}"
|
|
272
|
+
)
|
|
273
|
+
removed_locks += 1
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.warning(
|
|
276
|
+
f"{self.name}: Could not remove {lock_file.name}: {e}"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if removed_locks > 0:
|
|
280
|
+
logger.debug(
|
|
281
|
+
f"{self.name}: Cleaned up {removed_locks} Chrome lock files"
|
|
282
|
+
)
|
|
283
|
+
# Wait a moment for the system to release file handles
|
|
284
|
+
time.sleep(1)
|
|
285
|
+
|
|
286
|
+
# Kill any lingering Chrome processes using this profile
|
|
287
|
+
try:
|
|
288
|
+
profile_path_str = str(profile_dir)
|
|
289
|
+
# Find and kill Chrome processes using this profile
|
|
290
|
+
result = subprocess.run(
|
|
291
|
+
["pkill", "-f", f"user-data-dir={profile_path_str}"],
|
|
292
|
+
capture_output=True,
|
|
293
|
+
text=True,
|
|
294
|
+
)
|
|
295
|
+
if result.returncode == 0:
|
|
296
|
+
logger.debug(
|
|
297
|
+
f"{self.name}: Killed lingering Chrome processes for this profile"
|
|
298
|
+
)
|
|
299
|
+
time.sleep(2) # Give processes time to fully terminate
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.debug(f"{self.name}: Chrome process cleanup attempt: {e}")
|
|
302
|
+
|
|
303
|
+
# This show_asyncs a small screen with 4 extensions show_asyncn
|
|
304
|
+
persistent_context_launch_options["headless"] = False
|
|
305
|
+
self._persistent_context = await self._persistent_playwright.chromium.launch_persistent_context(
|
|
306
|
+
**persistent_context_launch_options
|
|
307
|
+
)
|
|
308
|
+
# First cleanup run (immediate, non-continuous)
|
|
309
|
+
await close_unwanted_pages(self._persistent_context, delay_sec=1, continuous=False)
|
|
310
|
+
# Background continuous monitoring task
|
|
311
|
+
asyncio.create_task(close_unwanted_pages(self._persistent_context, delay_sec=5, continuous=True))
|
|
312
|
+
# await self._close_unwanted_extension_pages_async()
|
|
313
|
+
# asyncio.create_task(self._close_unwanted_extension_pages_async())
|
|
314
|
+
await self._apply_stealth_scripts_to_persistent_context_async()
|
|
315
|
+
await self._load_auth_cookies_to_persistent_context_async()
|
|
316
|
+
self._persistent_browser = self._persistent_context.browser
|
|
317
|
+
|
|
318
|
+
def _verify_xvfb_running(self):
|
|
319
|
+
"""Verify Xvfb virtual display is running"""
|
|
320
|
+
try:
|
|
321
|
+
result = subprocess.run(
|
|
322
|
+
["xdpyinfo", "-display", f":{self.display}"],
|
|
323
|
+
capture_output=True,
|
|
324
|
+
text=True,
|
|
325
|
+
timeout=5,
|
|
326
|
+
)
|
|
327
|
+
if result.returncode == 0:
|
|
328
|
+
logger.debug(
|
|
329
|
+
f"{self.name}: Xvfb display :{self.display} is running"
|
|
330
|
+
)
|
|
331
|
+
return True
|
|
332
|
+
else:
|
|
333
|
+
logger.debug(
|
|
334
|
+
f"{self.name}: Starting Xvfb display :{self.display}"
|
|
335
|
+
)
|
|
336
|
+
# Kill any existing Xvfb on this display first
|
|
337
|
+
subprocess.run(
|
|
338
|
+
["pkill", "-f", f"Xvfb.*:{self.display}"],
|
|
339
|
+
capture_output=True,
|
|
340
|
+
)
|
|
341
|
+
time.sleep(0.5)
|
|
342
|
+
|
|
343
|
+
subprocess.Popen(
|
|
344
|
+
[
|
|
345
|
+
"Xvfb",
|
|
346
|
+
f":{self.display}",
|
|
347
|
+
"-screen",
|
|
348
|
+
"0",
|
|
349
|
+
"1920x1080x24", # 24-bit color depth for better rendering
|
|
350
|
+
"-ac", # Disable access control
|
|
351
|
+
"+extension",
|
|
352
|
+
"GLX", # OpenGL support
|
|
353
|
+
"+extension",
|
|
354
|
+
"RANDR", # Screen resize support
|
|
355
|
+
"+render", # Render extension for better graphics
|
|
356
|
+
"-noreset", # Don't reset after last client exits
|
|
357
|
+
"-dpi",
|
|
358
|
+
"96", # Standard DPI
|
|
359
|
+
],
|
|
360
|
+
env={**os.environ, "DISPLAY": f":{self.display}"},
|
|
361
|
+
)
|
|
362
|
+
time.sleep(3) # Give Xvfb more time to initialize properly
|
|
363
|
+
return self._verify_xvfb_running()
|
|
364
|
+
except Exception as e:
|
|
365
|
+
logger.error(f"{self.name}: Cannot verify Xvfb: {e}")
|
|
366
|
+
return False
|
|
367
|
+
|
|
368
|
+
def _build_persistent_context_launch_options(self):
|
|
369
|
+
stealth_args = self.stealth_manager.get_stealth_options_additional()
|
|
370
|
+
extension_args = self.chrome_profile_manager.get_extension_args()
|
|
371
|
+
pdf_download_args = [
|
|
372
|
+
"--always-open-pdf-externally",
|
|
373
|
+
"--disable-plugins-discovery",
|
|
374
|
+
"--plugin-policy=block",
|
|
375
|
+
]
|
|
376
|
+
|
|
377
|
+
stealth_args.extend(
|
|
378
|
+
[
|
|
379
|
+
f"--display=:{self.display}",
|
|
380
|
+
"--window-size=1920,1080",
|
|
381
|
+
]
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
no_welcome_args = [
|
|
385
|
+
"--disable-extensions-file-access-check",
|
|
386
|
+
"--disable-extensions-http-throttling",
|
|
387
|
+
"--disable-component-extensions-with-background-pages",
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
# Disable "Restore pages?" popup and session restore dialogs
|
|
391
|
+
no_restore_args = [
|
|
392
|
+
"--disable-session-crashed-bubble",
|
|
393
|
+
"--disable-infobars",
|
|
394
|
+
"--no-first-run",
|
|
395
|
+
"--no-default-browser-check",
|
|
396
|
+
]
|
|
397
|
+
|
|
398
|
+
screenshot_args = [
|
|
399
|
+
"--no-sandbox",
|
|
400
|
+
"--disable-blink-features=AutomationControlled",
|
|
401
|
+
"--disable-features=VizDisplayCompositor",
|
|
402
|
+
"--disable-web-security",
|
|
403
|
+
"--disable-features=TranslateUI",
|
|
404
|
+
"--disable-ipc-flooding-protection",
|
|
405
|
+
"--font-render-hinting=none",
|
|
406
|
+
"--disable-font-subpixel-positioning",
|
|
407
|
+
"--disable-remote-fonts",
|
|
408
|
+
"--disable-background-timer-throttling",
|
|
409
|
+
"--disable-backgrounding-occluded-windows",
|
|
410
|
+
"--disable-renderer-backgrounding",
|
|
411
|
+
"--disable-font-loading-api",
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
launch_args = (
|
|
415
|
+
extension_args
|
|
416
|
+
+ stealth_args
|
|
417
|
+
+ no_welcome_args
|
|
418
|
+
+ no_restore_args
|
|
419
|
+
+ pdf_download_args
|
|
420
|
+
+ screenshot_args
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Debug: Show window args for stealth mode
|
|
424
|
+
if self.spoof_dimension:
|
|
425
|
+
window_args = [arg for arg in launch_args if "window-" in arg]
|
|
426
|
+
logger.debug(f"{self.name}: Stealth window args: {window_args}")
|
|
427
|
+
|
|
428
|
+
proxy_config = None
|
|
429
|
+
if self.use_zenrows_proxy:
|
|
430
|
+
proxy_config = self.zenrows_proxy_manager.get_proxy_config()
|
|
431
|
+
|
|
432
|
+
# Set download directory to scholar library downloads folder
|
|
433
|
+
downloads_path = self.config.get_library_downloads_dir()
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
"user_data_dir": str(self.chrome_profile_manager.profile_dir),
|
|
437
|
+
"headless": self.headless,
|
|
438
|
+
"args": launch_args,
|
|
439
|
+
"accept_downloads": True, # Enable download handling
|
|
440
|
+
"downloads_path": str(
|
|
441
|
+
downloads_path
|
|
442
|
+
), # Set custom download directory
|
|
443
|
+
"proxy": proxy_config,
|
|
444
|
+
"viewport": {
|
|
445
|
+
"width": self.viewport_size[0],
|
|
446
|
+
"height": self.viewport_size[1],
|
|
447
|
+
},
|
|
448
|
+
"screen": {
|
|
449
|
+
"width": self.viewport_size[0],
|
|
450
|
+
"height": self.viewport_size[1],
|
|
451
|
+
},
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
async def _apply_stealth_scripts_to_persistent_context_async(self):
|
|
455
|
+
await self._persistent_context.add_init_script(
|
|
456
|
+
self.stealth_manager.get_init_script()
|
|
457
|
+
)
|
|
458
|
+
await self._persistent_context.add_init_script(
|
|
459
|
+
self.stealth_manager.get_dimension_spoofing_script()
|
|
460
|
+
)
|
|
461
|
+
await self._persistent_context.add_init_script(
|
|
462
|
+
self.cookie_acceptor.get_auto_acceptor_script()
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
async def _load_auth_cookies_to_persistent_context_async(self):
|
|
466
|
+
"""Load authentication cookies into the persistent browser context."""
|
|
467
|
+
if not self.auth_manager:
|
|
468
|
+
logger.debug(
|
|
469
|
+
f"{self.name}: No auth_manager available, skipping cookie loading"
|
|
470
|
+
)
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
# Check if we have authentication
|
|
475
|
+
if await self.auth_manager.is_authenticate_async(
|
|
476
|
+
verify_live=False
|
|
477
|
+
):
|
|
478
|
+
cookies = await self.auth_manager.get_auth_cookies_async()
|
|
479
|
+
if cookies:
|
|
480
|
+
await self._persistent_context.add_cookies(cookies)
|
|
481
|
+
logger.info(
|
|
482
|
+
f"{self.name}: Loaded {len(cookies)} authentication cookies into persistent browser context"
|
|
483
|
+
)
|
|
484
|
+
else:
|
|
485
|
+
logger.debug(
|
|
486
|
+
f"{self.name}: No cookies available from auth manager"
|
|
487
|
+
)
|
|
488
|
+
else:
|
|
489
|
+
logger.debug(
|
|
490
|
+
f"{self.name}: Not authenticated, skipping cookie loading"
|
|
491
|
+
)
|
|
492
|
+
except Exception as e:
|
|
493
|
+
logger.warning(
|
|
494
|
+
f"{self.name}: Failed to load authentication cookies: {e}"
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
async def take_screenshot_async(
|
|
498
|
+
self,
|
|
499
|
+
page,
|
|
500
|
+
path: Union[str, Path],
|
|
501
|
+
timeout_sec: float = 30.0,
|
|
502
|
+
timeout_after_sec: float = 30.0,
|
|
503
|
+
full_page: bool = False,
|
|
504
|
+
):
|
|
505
|
+
"""Take screenshot without viewport changes."""
|
|
506
|
+
try:
|
|
507
|
+
await page.screenshot(
|
|
508
|
+
path=path, timeout=timeout_sec * 1000, full_page=full_page
|
|
509
|
+
)
|
|
510
|
+
logger.info(f"{self.name}: Saved: {path}")
|
|
511
|
+
except Exception as e:
|
|
512
|
+
logger.fail(f"{self.name}: Screenshot failed for {path}: {e}")
|
|
513
|
+
|
|
514
|
+
async def start_periodic_screenshots_async(
|
|
515
|
+
self,
|
|
516
|
+
page,
|
|
517
|
+
output_dir: Union[str, Path],
|
|
518
|
+
prefix: str = "periodic",
|
|
519
|
+
interval_seconds: int = 1,
|
|
520
|
+
duration_seconds: int = 10,
|
|
521
|
+
verbose: bool = False,
|
|
522
|
+
):
|
|
523
|
+
"""
|
|
524
|
+
Start taking periodic screenshots in the background.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
page: The page to screenshot
|
|
528
|
+
prefix: Prefix for screenshot filenames
|
|
529
|
+
interval_seconds: Seconds between screenshots
|
|
530
|
+
duration_seconds: Total duration to take screenshots (0 = infinite)
|
|
531
|
+
verbose: Whether to log each screenshot
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
asyncio.Task that can be cancelled to stop screenshots
|
|
535
|
+
"""
|
|
536
|
+
|
|
537
|
+
async def take_periodic_screenshots():
|
|
538
|
+
elapsed = 0
|
|
539
|
+
step = 0
|
|
540
|
+
|
|
541
|
+
while duration_seconds == 0 or elapsed < duration_seconds:
|
|
542
|
+
step += 1
|
|
543
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[
|
|
544
|
+
:-3
|
|
545
|
+
] # Include milliseconds
|
|
546
|
+
path = os.path.join(
|
|
547
|
+
str(output_dir),
|
|
548
|
+
f"{prefix}_step{step:03d}_{timestamp}-{self.browser_mode}.png",
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
try:
|
|
552
|
+
await page.screenshot(path=path)
|
|
553
|
+
if verbose:
|
|
554
|
+
logger.debug(f"{self.name}: Screenshot {step}: {path}")
|
|
555
|
+
elif step == 1:
|
|
556
|
+
logger.debug(
|
|
557
|
+
f"{self.name}: Started periodic screenshots: {prefix}_*"
|
|
558
|
+
)
|
|
559
|
+
except Exception as e:
|
|
560
|
+
if verbose:
|
|
561
|
+
logger.debug(
|
|
562
|
+
f"{self.name}: Screenshot {step} failed: {e}"
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
await asyncio.sleep(interval_seconds)
|
|
566
|
+
elapsed += interval_seconds
|
|
567
|
+
|
|
568
|
+
logger.debug(
|
|
569
|
+
f"{self.name}: Completed {step} periodic screenshots for {prefix}"
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Start the task in background
|
|
573
|
+
task = asyncio.create_task(take_periodic_screenshots())
|
|
574
|
+
return task
|
|
575
|
+
|
|
576
|
+
async def stop_periodic_screenshots_async(self, task: asyncio.Task):
|
|
577
|
+
"""Stop periodic screenshots task."""
|
|
578
|
+
if task and not task.done():
|
|
579
|
+
task.cancel()
|
|
580
|
+
try:
|
|
581
|
+
await task
|
|
582
|
+
except asyncio.CancelledError:
|
|
583
|
+
logger.debug(f"{self.name}: Periodic screenshots stopped")
|
|
584
|
+
|
|
585
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
586
|
+
await super().__aexit__(exc_type, exc_val, exc_tb)
|
|
587
|
+
|
|
588
|
+
async def close(self):
|
|
589
|
+
"""Close browser while preserving authentication and extension data."""
|
|
590
|
+
try:
|
|
591
|
+
if (
|
|
592
|
+
self._persistent_context
|
|
593
|
+
and not self._persistent_context.browser.is_connected()
|
|
594
|
+
):
|
|
595
|
+
logger.debug(f"{self.name}: Browser already closed")
|
|
596
|
+
return
|
|
597
|
+
|
|
598
|
+
if self._persistent_context:
|
|
599
|
+
await self._persistent_context.close()
|
|
600
|
+
logger.debug(f"{self.name}: Closed persistent browser context")
|
|
601
|
+
|
|
602
|
+
if (
|
|
603
|
+
self._persistent_browser
|
|
604
|
+
and self._persistent_browser.is_connected()
|
|
605
|
+
):
|
|
606
|
+
await self._persistent_browser.close()
|
|
607
|
+
logger.debug(f"{self.name}: Closed persistent browser")
|
|
608
|
+
|
|
609
|
+
if self._persistent_playwright:
|
|
610
|
+
await self._persistent_playwright.stop()
|
|
611
|
+
logger.debug(f"{self.name}: Stopped Playwright instance")
|
|
612
|
+
|
|
613
|
+
except Exception as e:
|
|
614
|
+
logger.warning(f"{self.name}: Error during browser cleanup: {e}")
|
|
615
|
+
finally:
|
|
616
|
+
# Reset references but keep auth_manager and chrome_profile_manager
|
|
617
|
+
self._persistent_context = None
|
|
618
|
+
self._persistent_browser = None
|
|
619
|
+
self._persistent_playwright = None
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
if __name__ == "__main__":
|
|
623
|
+
|
|
624
|
+
async def main(browser_mode="interactive"):
|
|
625
|
+
"""Example usage of ScholarBrowserManager with stealth features."""
|
|
626
|
+
from scitex.scholar import ScholarAuthManager, ScholarBrowserManager
|
|
627
|
+
|
|
628
|
+
browser_manager = ScholarBrowserManager(
|
|
629
|
+
chrome_profile_name="system",
|
|
630
|
+
browser_mode=browser_mode,
|
|
631
|
+
auth_manager=ScholarAuthManager(),
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
browser, context = (
|
|
635
|
+
await browser_manager.get_authenticated_browser_and_context_async()
|
|
636
|
+
)
|
|
637
|
+
page = await context.new_page()
|
|
638
|
+
|
|
639
|
+
# Test sites configuration
|
|
640
|
+
test_sites = [
|
|
641
|
+
# {
|
|
642
|
+
# "name": "Extensions Test",
|
|
643
|
+
# "url": "",
|
|
644
|
+
# "screenshot_spath": "/tmp/openathens_test.png",
|
|
645
|
+
# },
|
|
646
|
+
# {
|
|
647
|
+
# "name": "SSO Test",
|
|
648
|
+
# "url": "https://sso.unimelb.edu.au/",
|
|
649
|
+
# "screenshot_spath": "/tmp/unimelb_sso_test.png",
|
|
650
|
+
# },
|
|
651
|
+
# {
|
|
652
|
+
# "name": "OpenAthens",
|
|
653
|
+
# "url": "https://my.openathens.net/account",
|
|
654
|
+
# "screenshot_spath": "/tmp/openathens_test.png",
|
|
655
|
+
# },
|
|
656
|
+
# {
|
|
657
|
+
# "name": "CAPTCHA Test",
|
|
658
|
+
# "url": "https://www.google.com/recaptcha/api2/demo",
|
|
659
|
+
# "screenshot_spath": "/tmp/captcha_test.png",
|
|
660
|
+
# },
|
|
661
|
+
{
|
|
662
|
+
"name": "Nature Test",
|
|
663
|
+
"url": "https://www.nature.com/articles/s41593-025-01990-7",
|
|
664
|
+
"screenshot_spath": "/tmp/nature_test.png",
|
|
665
|
+
},
|
|
666
|
+
# {
|
|
667
|
+
# "name": "Google Test",
|
|
668
|
+
# "url": "https://www.google.com",
|
|
669
|
+
# "screenshot_spath": "/tmp/google_test.png",
|
|
670
|
+
# },
|
|
671
|
+
]
|
|
672
|
+
|
|
673
|
+
# Run tests for each site
|
|
674
|
+
for site in test_sites:
|
|
675
|
+
try:
|
|
676
|
+
await page.goto(
|
|
677
|
+
site["url"], wait_until="domcontentloaded", timeout=30000
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
await browser_manager.take_screenshot_async(
|
|
681
|
+
page, site["screenshot_spath"]
|
|
682
|
+
)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
logger.fail(f"Failed to process {site['name']}: {e}")
|
|
685
|
+
continue
|
|
686
|
+
|
|
687
|
+
import argparse
|
|
688
|
+
|
|
689
|
+
parser = argparse.ArgumentParser(
|
|
690
|
+
description="ScholarBrowserManager testing"
|
|
691
|
+
)
|
|
692
|
+
parser.add_argument(
|
|
693
|
+
"--stealth",
|
|
694
|
+
action="store_true",
|
|
695
|
+
help="Use stealth mode (default: interactive)",
|
|
696
|
+
)
|
|
697
|
+
args = parser.parse_args()
|
|
698
|
+
|
|
699
|
+
browser_mode = "stealth" if args.stealth else "interactive"
|
|
700
|
+
asyncio.run(main(browser_mode=browser_mode))
|
|
701
|
+
|
|
702
|
+
# python -m scitex.scholar.browser.ScholarBrowserManager --stealth
|
|
703
|
+
# python -m scitex.scholar.browser.ScholarBrowserManager
|
|
704
|
+
|
|
705
|
+
# EOF
|