scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-07-31 22:08:31 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/browser/remote/_ZenRowsRemoteScholarBrowserManager.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Browser manager specifically for the ZenRows Scraping Browser service.
|
|
14
|
+
This provides cloud-based Chrome instances with built-in anti-bot bypass.
|
|
15
|
+
"""
|
|
16
|
+
from typing import Any, Optional, Dict
|
|
17
|
+
|
|
18
|
+
from playwright.async_api import Browser, BrowserContext, async_playwright, Page
|
|
19
|
+
|
|
20
|
+
from scitex import logging
|
|
21
|
+
from scitex.scholar.browser.local.utils._CookieAutoAcceptor import CookieAutoAcceptor
|
|
22
|
+
from ._ZenRowsAPIBrowser import ZenRowsAPIBrowser
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ZenRowsRemoteScholarBrowserManager:
|
|
28
|
+
"""
|
|
29
|
+
Manages a connection to the remote ZenRows Scraping Browser service.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
auth_manager=None,
|
|
35
|
+
zenrows_api_key: Optional[str] = os.getenv(
|
|
36
|
+
"SCITEX_SCHOLAR_ZENROWS_API_KEY"
|
|
37
|
+
),
|
|
38
|
+
proxy_country: Optional[str] = os.getenv(
|
|
39
|
+
"SCITEX_SCHOLAR_ZENROWS_PROXY_COUNTRY"
|
|
40
|
+
),
|
|
41
|
+
**kwargs,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Initialize ZenRows browser manager.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
auth_manager: Authentication manager for cookie injection.
|
|
48
|
+
zenrows_api_key: ZenRows API key.
|
|
49
|
+
proxy_country: Country code for proxy routing (e.g., 'au', 'us').
|
|
50
|
+
Note: Country routing may only work with certain endpoints.
|
|
51
|
+
**kwargs: Additional arguments (ignored, for compatibility).
|
|
52
|
+
"""
|
|
53
|
+
self.auth_manager = auth_manager
|
|
54
|
+
self.zenrows_api_key = zenrows_api_key
|
|
55
|
+
self.proxy_country = proxy_country
|
|
56
|
+
if not self.zenrows_api_key:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"ZenRows API key required. Set SCITEX_SCHOLAR_ZENROWS_API_KEY env var "
|
|
59
|
+
"or pass zenrows_api_key parameter"
|
|
60
|
+
)
|
|
61
|
+
self._playwright = None
|
|
62
|
+
self._browser: Optional[Browser] = None
|
|
63
|
+
self._context: Optional[BrowserContext] = None
|
|
64
|
+
self.cookie_acceptor = CookieAutoAcceptor()
|
|
65
|
+
|
|
66
|
+
# Also initialize API browser for reliable screenshots
|
|
67
|
+
self._api_browser = ZenRowsAPIBrowser(
|
|
68
|
+
api_key=self.zenrows_api_key,
|
|
69
|
+
proxy_country=self.proxy_country or "au"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
async def get_browser_async(self) -> Browser:
|
|
73
|
+
"""Connect to the ZenRows Scraping Browser."""
|
|
74
|
+
if self._browser and self._browser.is_connected():
|
|
75
|
+
return self._browser
|
|
76
|
+
|
|
77
|
+
logger.debug("Connecting to ZenRows Scraping Browser...")
|
|
78
|
+
if not self._playwright:
|
|
79
|
+
self._playwright = await async_playwright().start()
|
|
80
|
+
|
|
81
|
+
# Build connection URL with optional country parameter
|
|
82
|
+
connection_url = f"wss://browser.zenrows.com?apikey={self.zenrows_api_key}"
|
|
83
|
+
|
|
84
|
+
# Note: Country routing via WebSocket URL is not documented
|
|
85
|
+
# but we can try appending it as a parameter
|
|
86
|
+
if self.proxy_country:
|
|
87
|
+
connection_url += f"&proxy_country={self.proxy_country}"
|
|
88
|
+
logger.debug(f"Requesting proxy country: {self.proxy_country.upper()}")
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(
|
|
92
|
+
connection_url
|
|
93
|
+
)
|
|
94
|
+
logger.debug("Successfully connected to ZenRows browser")
|
|
95
|
+
|
|
96
|
+
# Log a note about country routing
|
|
97
|
+
if self.proxy_country:
|
|
98
|
+
logger.debug(
|
|
99
|
+
"Note: Country routing via Scraping Browser is experimental. "
|
|
100
|
+
"Use API mode for guaranteed country-specific IPs."
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return self._browser
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(f"Failed to connect to ZenRows browser: {e}")
|
|
106
|
+
raise
|
|
107
|
+
|
|
108
|
+
async def get_authenticated_browser_and_context_async(
|
|
109
|
+
self,
|
|
110
|
+
) -> tuple[Browser, BrowserContext]:
|
|
111
|
+
"""Get browser context with authentication cookies pre-loaded."""
|
|
112
|
+
|
|
113
|
+
if self.auth_manager is None:
|
|
114
|
+
err_msg = (
|
|
115
|
+
"Authentication manager is not set. "
|
|
116
|
+
"Initialize ScholarBrowserManager with an auth_manager to use this method."
|
|
117
|
+
)
|
|
118
|
+
raise ValueError(err_msg)
|
|
119
|
+
|
|
120
|
+
browser = await self.get_browser_async()
|
|
121
|
+
|
|
122
|
+
if browser.contexts:
|
|
123
|
+
context = browser.contexts[0]
|
|
124
|
+
else:
|
|
125
|
+
context = await browser.new_context()
|
|
126
|
+
|
|
127
|
+
# Inject cookie auto-acceptor
|
|
128
|
+
try:
|
|
129
|
+
await self.cookie_acceptor.inject_auto_acceptor_async(context)
|
|
130
|
+
logger.debug("Injected cookie auto-acceptor")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.warn(f"Failed to inject cookie acceptor: {e}")
|
|
133
|
+
|
|
134
|
+
if self.auth_manager and await self.auth_manager.is_authenticate_async():
|
|
135
|
+
try:
|
|
136
|
+
cookies = await self.auth_manager.get_auth_cookies_async()
|
|
137
|
+
await context.add_cookies(cookies)
|
|
138
|
+
logger.success(
|
|
139
|
+
f"Injected {len(cookies)} authentication cookies"
|
|
140
|
+
)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"Failed to inject auth cookies: {e}")
|
|
143
|
+
|
|
144
|
+
self._context = context
|
|
145
|
+
return browser, context
|
|
146
|
+
|
|
147
|
+
async def new_page(self, context: Optional[BrowserContext] = None) -> Any:
|
|
148
|
+
"""Create a new page in the ZenRows browser."""
|
|
149
|
+
if not context:
|
|
150
|
+
_, context = await self.get_authenticated_browser_and_context_async()
|
|
151
|
+
|
|
152
|
+
page = await context.new_page()
|
|
153
|
+
await page.set_extra_http_headers(
|
|
154
|
+
{
|
|
155
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
156
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
return page
|
|
160
|
+
|
|
161
|
+
async def close(self):
|
|
162
|
+
"""Close the ZenRows browser connection."""
|
|
163
|
+
if self._browser and self._browser.is_connected():
|
|
164
|
+
await self._browser.close()
|
|
165
|
+
logger.debug("Closed ZenRows browser connection")
|
|
166
|
+
if self._playwright:
|
|
167
|
+
await self._playwright.stop()
|
|
168
|
+
self._browser = None
|
|
169
|
+
self._context = None
|
|
170
|
+
self._playwright = None
|
|
171
|
+
|
|
172
|
+
async def take_screenshot_reliable_async(
|
|
173
|
+
self,
|
|
174
|
+
url: str,
|
|
175
|
+
output_path: str,
|
|
176
|
+
use_api: bool = True,
|
|
177
|
+
wait_ms: int = 5000
|
|
178
|
+
) -> Dict[str, Any]:
|
|
179
|
+
"""Take a screenshot with automatic CAPTCHA handling.
|
|
180
|
+
|
|
181
|
+
This method provides reliable screenshot capture by:
|
|
182
|
+
1. Using the API approach by default (more reliable)
|
|
183
|
+
2. Falling back to WebSocket browser if needed
|
|
184
|
+
3. Automatically handling CAPTCHAs via ZenRows
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
url: URL to screenshot
|
|
188
|
+
output_path: Path to save screenshot
|
|
189
|
+
use_api: Use API browser (recommended) vs WebSocket
|
|
190
|
+
wait_ms: Additional wait time
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Dict with success status and details
|
|
194
|
+
"""
|
|
195
|
+
if use_api:
|
|
196
|
+
# Use API browser for reliability
|
|
197
|
+
logger.debug("Using ZenRows API for screenshot (recommended)")
|
|
198
|
+
return await self._api_browser.navigate_and_screenshot_async(
|
|
199
|
+
url=url,
|
|
200
|
+
screenshot_path=output_path,
|
|
201
|
+
wait_ms=wait_ms
|
|
202
|
+
)
|
|
203
|
+
else:
|
|
204
|
+
# Use WebSocket browser (less reliable for captchas)
|
|
205
|
+
logger.debug("Using ZenRows WebSocket browser")
|
|
206
|
+
try:
|
|
207
|
+
browser = await self.get_browser_async()
|
|
208
|
+
context = await browser.new_context()
|
|
209
|
+
page = await context.new_page()
|
|
210
|
+
|
|
211
|
+
# Navigate
|
|
212
|
+
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
213
|
+
|
|
214
|
+
# Wait for content
|
|
215
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
|
216
|
+
await page.wait_for_timeout(wait_ms)
|
|
217
|
+
|
|
218
|
+
# Take screenshot
|
|
219
|
+
await page.screenshot(path=output_path, full_page=True)
|
|
220
|
+
|
|
221
|
+
await page.close()
|
|
222
|
+
await context.close()
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
"success": True,
|
|
226
|
+
"screenshot": {
|
|
227
|
+
"saved": True,
|
|
228
|
+
"path": output_path
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.error(f"WebSocket screenshot failed: {e}")
|
|
233
|
+
return {
|
|
234
|
+
"success": False,
|
|
235
|
+
"error": str(e)
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async def navigate_and_extract_async(
|
|
239
|
+
self,
|
|
240
|
+
url: str,
|
|
241
|
+
extract_pdf_url: bool = True,
|
|
242
|
+
take_screenshot: bool = False,
|
|
243
|
+
screenshot_path: Optional[str] = None
|
|
244
|
+
) -> Dict[str, Any]:
|
|
245
|
+
"""Navigate to URL and extract information.
|
|
246
|
+
|
|
247
|
+
This combines navigation, screenshot, and data extraction.
|
|
248
|
+
Uses the API approach for better reliability.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
url: Target URL
|
|
252
|
+
extract_pdf_url: Try to find PDF URL
|
|
253
|
+
take_screenshot: Whether to capture screenshot
|
|
254
|
+
screenshot_path: Where to save screenshot
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Dict with extracted data
|
|
258
|
+
"""
|
|
259
|
+
result = await self._api_browser.navigate_and_screenshot_async(
|
|
260
|
+
url=url,
|
|
261
|
+
screenshot_path=screenshot_path if take_screenshot else None,
|
|
262
|
+
return_html=extract_pdf_url,
|
|
263
|
+
wait_ms=8000 # Longer wait for academic sites
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if extract_pdf_url and result.get("html"):
|
|
267
|
+
# Try to extract PDF URL
|
|
268
|
+
import re
|
|
269
|
+
html = result["html"]
|
|
270
|
+
|
|
271
|
+
pdf_patterns = [
|
|
272
|
+
r'href="([^"]+\.pdf[^"]*)"',
|
|
273
|
+
r'content="([^"]+\.pdf[^"]*)"',
|
|
274
|
+
r'data-pdf-url="([^"]+)"',
|
|
275
|
+
r'pdfUrl["\']?\s*:\s*["\']([^"\']+)',
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
for pattern in pdf_patterns:
|
|
279
|
+
match = re.search(pattern, html, re.IGNORECASE)
|
|
280
|
+
if match:
|
|
281
|
+
result["pdf_url"] = match.group(1)
|
|
282
|
+
logger.debug(f"Found PDF URL: {result['pdf_url']}")
|
|
283
|
+
break
|
|
284
|
+
|
|
285
|
+
return result
|
|
286
|
+
|
|
287
|
+
async def __aenter__(self):
|
|
288
|
+
"""Async context manager entry."""
|
|
289
|
+
return self
|
|
290
|
+
|
|
291
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
292
|
+
"""Async context manager exit."""
|
|
293
|
+
await self.close()
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
if __name__ == "__main__":
|
|
297
|
+
import asyncio
|
|
298
|
+
import os
|
|
299
|
+
|
|
300
|
+
async def main():
|
|
301
|
+
"""Comprehensive test of ZenRowsRemoteScholarBrowserManager with comparisons."""
|
|
302
|
+
import json
|
|
303
|
+
from pathlib import Path
|
|
304
|
+
from datetime import datetime
|
|
305
|
+
|
|
306
|
+
# Create screenshots directory
|
|
307
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
308
|
+
screenshots_dir = Path(f"./screenshots_remote_{timestamp}")
|
|
309
|
+
screenshots_dir.mkdir(exist_ok=True)
|
|
310
|
+
|
|
311
|
+
# Test sites for comprehensive evaluation
|
|
312
|
+
test_sites = [
|
|
313
|
+
("ip", "https://httpbin.org/ip", "Shows your public IP address"),
|
|
314
|
+
("headers", "https://httpbin.org/headers", "HTTP headers sent by browser"),
|
|
315
|
+
("bot_detection", "https://bot.sannysoft.com/", "Bot tests - green=good, red=detected"),
|
|
316
|
+
("fingerprint", "https://pixelscan.net/", "Browser fingerprinting analysis"),
|
|
317
|
+
("webrtc", "https://browserleaks.com/webrtc", "WebRTC IP leak test"),
|
|
318
|
+
]
|
|
319
|
+
|
|
320
|
+
async def test_browser_async(browser_type, browser_manager, use_auth=False):
|
|
321
|
+
"""Test a browser with all test sites."""
|
|
322
|
+
print(f"\n{'='*60}")
|
|
323
|
+
print(f"Testing: {browser_type}")
|
|
324
|
+
print('='*60)
|
|
325
|
+
|
|
326
|
+
results = {}
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
if use_auth and hasattr(browser_manager, 'get_authenticated_browser_and_context_async'):
|
|
330
|
+
# For managers with auth support
|
|
331
|
+
browser, context = await browser_manager.get_authenticated_browser_and_context_async()
|
|
332
|
+
pages_via_context = True
|
|
333
|
+
else:
|
|
334
|
+
# Direct browser access
|
|
335
|
+
browser = await browser_manager.get_browser_async()
|
|
336
|
+
pages_via_context = False
|
|
337
|
+
|
|
338
|
+
for test_name, url, description in test_sites:
|
|
339
|
+
print(f"\n{test_name}: {description}")
|
|
340
|
+
|
|
341
|
+
page = None
|
|
342
|
+
try:
|
|
343
|
+
if pages_via_context:
|
|
344
|
+
page = await context.new_page()
|
|
345
|
+
else:
|
|
346
|
+
page = await browser.new_page()
|
|
347
|
+
|
|
348
|
+
# Navigate with timeout
|
|
349
|
+
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
350
|
+
|
|
351
|
+
if test_name in ["ip", "headers"]:
|
|
352
|
+
# Extract text content
|
|
353
|
+
content = await page.text_content("pre")
|
|
354
|
+
print(f"Result: {content.strip()[:200]}...")
|
|
355
|
+
|
|
356
|
+
# Parse IP if available
|
|
357
|
+
if test_name == "ip":
|
|
358
|
+
try:
|
|
359
|
+
ip_data = json.loads(content)
|
|
360
|
+
results['ip'] = ip_data.get('origin', 'Unknown')
|
|
361
|
+
print(f"Detected IP: {results['ip']}")
|
|
362
|
+
except:
|
|
363
|
+
results['ip'] = 'Parse error'
|
|
364
|
+
else:
|
|
365
|
+
# Wait for dynamic content
|
|
366
|
+
await page.wait_for_timeout(5000)
|
|
367
|
+
|
|
368
|
+
# For fingerprint test, try to click start button
|
|
369
|
+
if test_name == "fingerprint":
|
|
370
|
+
try:
|
|
371
|
+
await page.click('button:has-text("Start")', timeout=3000)
|
|
372
|
+
await page.wait_for_timeout(5000)
|
|
373
|
+
except:
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
# Take screenshot
|
|
377
|
+
screenshot_path = screenshots_dir / f"{browser_type.lower().replace(' ', '_')}_{test_name}.png"
|
|
378
|
+
await page.screenshot(path=screenshot_path, full_page=True)
|
|
379
|
+
print(f"Screenshot saved: {screenshot_path}")
|
|
380
|
+
|
|
381
|
+
results[test_name] = "Success"
|
|
382
|
+
|
|
383
|
+
except Exception as e:
|
|
384
|
+
print(f"Failed: {str(e)[:100]}...")
|
|
385
|
+
results[test_name] = f"Failed: {str(e)[:50]}"
|
|
386
|
+
finally:
|
|
387
|
+
if page:
|
|
388
|
+
await page.close()
|
|
389
|
+
|
|
390
|
+
# Clean up
|
|
391
|
+
if hasattr(browser_manager, 'close'):
|
|
392
|
+
await browser_manager.close()
|
|
393
|
+
|
|
394
|
+
except Exception as e:
|
|
395
|
+
print(f"Browser initialization failed: {str(e)}")
|
|
396
|
+
results['error'] = str(e)
|
|
397
|
+
|
|
398
|
+
return results
|
|
399
|
+
|
|
400
|
+
# Store all results
|
|
401
|
+
all_results = {}
|
|
402
|
+
|
|
403
|
+
# Test 1: Regular browser (baseline) - if available
|
|
404
|
+
print("\nChecking if we can import local browser for comparison...")
|
|
405
|
+
try:
|
|
406
|
+
from scitex.scholar.browser import ScholarBrowserManager
|
|
407
|
+
|
|
408
|
+
print("Initializing regular browser for baseline comparison...")
|
|
409
|
+
regular_manager = ScholarBrowserManager(headless=False)
|
|
410
|
+
regular_results = await test_browser_async("Regular Browser", regular_manager)
|
|
411
|
+
all_results["Regular Browser"] = regular_results
|
|
412
|
+
except Exception as e:
|
|
413
|
+
print(f"Regular browser not available for comparison: {e}")
|
|
414
|
+
all_results["Regular Browser"] = {"error": "Not available"}
|
|
415
|
+
|
|
416
|
+
# Test 2: ZenRows Remote Browser (default settings)
|
|
417
|
+
print("\nInitializing ZenRows Remote Browser...")
|
|
418
|
+
try:
|
|
419
|
+
zenrows_manager = ZenRowsRemoteScholarBrowserManager()
|
|
420
|
+
zenrows_results = await test_browser_async("ZenRows Remote", zenrows_manager)
|
|
421
|
+
all_results["ZenRows Remote"] = zenrows_results
|
|
422
|
+
except Exception as e:
|
|
423
|
+
print(f"ZenRows Remote test failed: {e}")
|
|
424
|
+
all_results["ZenRows Remote"] = {"error": str(e)}
|
|
425
|
+
|
|
426
|
+
# Test 3: ZenRows Remote Browser with country (if supported)
|
|
427
|
+
print("\nInitializing ZenRows Remote Browser with AU country...")
|
|
428
|
+
try:
|
|
429
|
+
zenrows_au_manager = ZenRowsRemoteScholarBrowserManager(proxy_country='au')
|
|
430
|
+
zenrows_au_results = await test_browser_async("ZenRows Remote AU", zenrows_au_manager)
|
|
431
|
+
all_results["ZenRows Remote AU"] = zenrows_au_results
|
|
432
|
+
except Exception as e:
|
|
433
|
+
print(f"ZenRows Remote AU test failed: {e}")
|
|
434
|
+
all_results["ZenRows Remote AU"] = {"error": str(e)}
|
|
435
|
+
|
|
436
|
+
# Test 4: Test the API client as well
|
|
437
|
+
print("\nTesting ZenRows API Client for comparison...")
|
|
438
|
+
try:
|
|
439
|
+
from ._ZenRowsAPIClient import ZenRowsAPIClient
|
|
440
|
+
|
|
441
|
+
print("Testing basic API request...")
|
|
442
|
+
api_client = ZenRowsAPIClient()
|
|
443
|
+
response = api_client.request("https://httpbin.org/ip")
|
|
444
|
+
if response.status_code == 200:
|
|
445
|
+
ip_data = json.loads(response.text)
|
|
446
|
+
print(f"API Client IP (Basic): {ip_data.get('origin', 'Unknown')}")
|
|
447
|
+
print(f"API Cost: {response.headers.get('X-Request-Cost', 'Unknown')} credits")
|
|
448
|
+
all_results["API Client Basic"] = {"ip": ip_data.get('origin', 'Unknown')}
|
|
449
|
+
|
|
450
|
+
print("\nTesting API with Australian proxy...")
|
|
451
|
+
api_client_au = ZenRowsAPIClient(default_country='au')
|
|
452
|
+
response_au = api_client_au.request("https://httpbin.org/ip")
|
|
453
|
+
if response_au.status_code == 200:
|
|
454
|
+
ip_data_au = json.loads(response_au.text)
|
|
455
|
+
print(f"API Client IP (AU): {ip_data_au.get('origin', 'Unknown')}")
|
|
456
|
+
print(f"API Cost: {response_au.headers.get('X-Request-Cost', 'Unknown')} credits")
|
|
457
|
+
all_results["API Client AU"] = {"ip": ip_data_au.get('origin', 'Unknown')}
|
|
458
|
+
except Exception as e:
|
|
459
|
+
print(f"API Client test failed: {e}")
|
|
460
|
+
all_results["API Client"] = {"error": str(e)}
|
|
461
|
+
|
|
462
|
+
# Print summary
|
|
463
|
+
print("\n" + "="*60)
|
|
464
|
+
print("SUMMARY REPORT")
|
|
465
|
+
print("="*60)
|
|
466
|
+
|
|
467
|
+
print("\nIP Addresses detected:")
|
|
468
|
+
for method, data in all_results.items():
|
|
469
|
+
if isinstance(data, dict):
|
|
470
|
+
ip = data.get('ip', 'Not tested')
|
|
471
|
+
else:
|
|
472
|
+
ip = 'Error'
|
|
473
|
+
print(f" {method:.<35} {ip}")
|
|
474
|
+
|
|
475
|
+
print(f"\nScreenshots saved in: {screenshots_dir.absolute()}")
|
|
476
|
+
|
|
477
|
+
# Save summary report
|
|
478
|
+
summary_path = screenshots_dir / "test_summary.json"
|
|
479
|
+
with open(summary_path, 'w') as f:
|
|
480
|
+
json.dump({
|
|
481
|
+
'timestamp': timestamp,
|
|
482
|
+
'results': all_results,
|
|
483
|
+
'test_sites': [{"name": t[0], "url": t[1], "description": t[2]} for t in test_sites]
|
|
484
|
+
}, f, indent=2)
|
|
485
|
+
print(f"Summary report saved: {summary_path}")
|
|
486
|
+
|
|
487
|
+
# Comparison notes
|
|
488
|
+
print("\n" + "="*60)
|
|
489
|
+
print("COMPARISON NOTES:")
|
|
490
|
+
print("="*60)
|
|
491
|
+
print("1. Regular Browser: Uses your local IP, no proxy")
|
|
492
|
+
print("2. ZenRows Remote: Cloud browser with built-in anti-bot")
|
|
493
|
+
print("3. ZenRows Remote AU: Attempts Australian IP (experimental)")
|
|
494
|
+
print("4. API Client Basic: Direct API without country routing")
|
|
495
|
+
print("5. API Client AU: Guaranteed Australian IP via API mode")
|
|
496
|
+
print("\nRecommendation: Use API Client for country-specific needs,")
|
|
497
|
+
print("Remote Browser for complex JavaScript sites.")
|
|
498
|
+
|
|
499
|
+
# async def main():
|
|
500
|
+
# """Example usage of ZenRowsRemoteScholarBrowserManager."""
|
|
501
|
+
# # Get API key from environment or use a test key
|
|
502
|
+
# api_key = os.getenv(
|
|
503
|
+
# "SCITEX_SCHOLAR_ZENROWS_API_KEY", "your_api_key_here"
|
|
504
|
+
# )
|
|
505
|
+
|
|
506
|
+
# # Initialize remote browser manager
|
|
507
|
+
# async with ZenRowsRemoteScholarBrowserManager(api_key=api_key) as manager:
|
|
508
|
+
# try:
|
|
509
|
+
# # Connect to ZenRows Scraping Browser
|
|
510
|
+
# browser = await manager.connect()
|
|
511
|
+
# print("Connected to ZenRows Scraping Browser")
|
|
512
|
+
|
|
513
|
+
# # Get the browser context
|
|
514
|
+
# context = await manager.get_context()
|
|
515
|
+
|
|
516
|
+
# # Create a new page
|
|
517
|
+
# page = await context.new_page()
|
|
518
|
+
|
|
519
|
+
# # Navigate to a site with anti-bot protection
|
|
520
|
+
# print("Navigating to protected site...")
|
|
521
|
+
# await page.goto("https://httpbin.org/headers", wait_until="domcontentloaded", timeout=30000)
|
|
522
|
+
|
|
523
|
+
# # Get page content
|
|
524
|
+
# content = await page.content()
|
|
525
|
+
# print("Page loaded successfully")
|
|
526
|
+
|
|
527
|
+
# # Check headers to verify we're using ZenRows
|
|
528
|
+
# import json
|
|
529
|
+
|
|
530
|
+
# try:
|
|
531
|
+
# # Extract JSON from pre tag
|
|
532
|
+
# pre_element = await page.query_selector("pre")
|
|
533
|
+
# if pre_element:
|
|
534
|
+
# text = await pre_element.inner_text()
|
|
535
|
+
# headers = json.loads(text)
|
|
536
|
+
# print("\nRequest headers seen by server:")
|
|
537
|
+
# for key, value in headers.get("headers", {}).items():
|
|
538
|
+
# print(f" {key}: {value}")
|
|
539
|
+
# except Exception as e:
|
|
540
|
+
# print(f"Could not parse headers: {e}")
|
|
541
|
+
|
|
542
|
+
# # Example: Navigate to a site that requires authentication
|
|
543
|
+
# print("\nNavigating to academic site...")
|
|
544
|
+
# await page.goto("https://scholar.google.com", wait_until="domcontentloaded", timeout=30000)
|
|
545
|
+
# await page.wait_for_timeout(2000)
|
|
546
|
+
|
|
547
|
+
# # Take screenshot
|
|
548
|
+
# await page.screenshot(path="zenrows_remote_screenshot.png")
|
|
549
|
+
# print("Screenshot saved as zenrows_remote_screenshot.png")
|
|
550
|
+
|
|
551
|
+
# # Example: Handle dynamic content
|
|
552
|
+
# print("\nTesting dynamic content handling...")
|
|
553
|
+
# await page.goto("https://example.com", wait_until="domcontentloaded", timeout=30000)
|
|
554
|
+
# title = await page.title()
|
|
555
|
+
# print(f"Page title: {title}")
|
|
556
|
+
|
|
557
|
+
# except Exception as e:
|
|
558
|
+
# print(f"Error during browser operation: {e}")
|
|
559
|
+
# import traceback
|
|
560
|
+
|
|
561
|
+
# traceback.print_exc()
|
|
562
|
+
|
|
563
|
+
# print("\nZenRows browser session closed")
|
|
564
|
+
|
|
565
|
+
# Run the example
|
|
566
|
+
asyncio.run(main())
|
|
567
|
+
|
|
568
|
+
# python -m scitex.scholar.browser.remote._ZenRowsRemoteScholarBrowserManager
|
|
569
|
+
|
|
570
|
+
# EOF
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Remote browser components (ZenRows, CAPTCHA handling)."""
|
|
2
|
+
|
|
3
|
+
from .ZenRowsAPIClient import ZenRowsAPIBrowser
|
|
4
|
+
from .ZenRowsBrowserManager import ZenRowsRemoteScholarBrowserManager
|
|
5
|
+
from .CaptchaHandler import CaptchaHandler
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"ZenRowsAPIBrowser",
|
|
9
|
+
"ZenRowsRemoteScholarBrowserManager",
|
|
10
|
+
"CaptchaHandler",
|
|
11
|
+
]
|