scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-11 01:19:48 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/find_pdf_urls_by_navigation.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/url/strategies/find_pdf_urls_by_navigation.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Find PDF URLs by navigating to PDF links and following redirects.
|
|
16
|
+
|
|
17
|
+
Handles publishers like ScienceDirect that require navigation
|
|
18
|
+
through redirect chains to reach the actual PDF URL.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import List, Optional
|
|
22
|
+
from urllib.parse import urljoin
|
|
23
|
+
|
|
24
|
+
from playwright.async_api import Page
|
|
25
|
+
|
|
26
|
+
from scitex.browser.debugging import browser_logger
|
|
27
|
+
from scitex.scholar import ScholarConfig
|
|
28
|
+
from scitex.scholar.browser.utils import wait_redirects
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def find_pdf_urls_by_navigation(
|
|
32
|
+
page: Page,
|
|
33
|
+
url: str = None,
|
|
34
|
+
config: ScholarConfig = None,
|
|
35
|
+
func_name: str = "find_pdf_urls_by_navigation",
|
|
36
|
+
) -> List[str]:
|
|
37
|
+
"""
|
|
38
|
+
Find PDF URLs by navigating to PDF links and capturing final URLs.
|
|
39
|
+
|
|
40
|
+
This handles cases like ScienceDirect where:
|
|
41
|
+
1. Direct PDF links exist (/pdfft? endpoints)
|
|
42
|
+
2. Navigating to them triggers redirects
|
|
43
|
+
3. Final destination is the actual PDF on pdf.sciencedirectassets.com
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
page: Playwright page object
|
|
47
|
+
url: Current page URL (unused, for signature consistency)
|
|
48
|
+
config: ScholarConfig instance
|
|
49
|
+
func_name: Function name for logging
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
List of PDF URLs found
|
|
53
|
+
"""
|
|
54
|
+
config = config or ScholarConfig()
|
|
55
|
+
pdf_urls = []
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Check if we already have direct PDF links
|
|
59
|
+
current_url = page.url.lower()
|
|
60
|
+
is_sciencedirect = any(
|
|
61
|
+
domain in current_url
|
|
62
|
+
for domain in [
|
|
63
|
+
"sciencedirect.com",
|
|
64
|
+
"cell.com",
|
|
65
|
+
"elsevier.com",
|
|
66
|
+
"ssrn.com",
|
|
67
|
+
]
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if not is_sciencedirect:
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
# Look for existing PDF links on the page
|
|
74
|
+
pdf_link_selectors = [
|
|
75
|
+
'a[href*="/pdfft?"]', # ScienceDirect PDF endpoint
|
|
76
|
+
'a[aria-label*="Download PDF"]',
|
|
77
|
+
'a[aria-label*="Download This Paper"]',
|
|
78
|
+
'a:has-text("View PDF")',
|
|
79
|
+
"a.pdf-link",
|
|
80
|
+
'a[href*="/pdf/"]',
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
pdf_href = None
|
|
84
|
+
for selector in pdf_link_selectors:
|
|
85
|
+
try:
|
|
86
|
+
element = await page.query_selector(selector)
|
|
87
|
+
if element:
|
|
88
|
+
href = await element.get_attribute("href")
|
|
89
|
+
if href and ("/pdfft?" in href or "/pdf/" in href):
|
|
90
|
+
# Make absolute URL if needed
|
|
91
|
+
if href.startswith("/"):
|
|
92
|
+
href = urljoin(page.url, href)
|
|
93
|
+
pdf_href = href
|
|
94
|
+
await browser_logger.debug(
|
|
95
|
+
page,
|
|
96
|
+
f"{func_name}: Found PDF link: {href[:80]}...",
|
|
97
|
+
)
|
|
98
|
+
break
|
|
99
|
+
except Exception as e:
|
|
100
|
+
await browser_logger.debug(
|
|
101
|
+
page,
|
|
102
|
+
f"{func_name}: Error checking selector {selector}: {e}",
|
|
103
|
+
)
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
if not pdf_href:
|
|
107
|
+
await browser_logger.debug(
|
|
108
|
+
page, f"{func_name}: No PDF links found on page"
|
|
109
|
+
)
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
# Navigate to PDF URL in a new page to capture final URL
|
|
113
|
+
context = page.context
|
|
114
|
+
new_page = None
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
await browser_logger.debug(
|
|
118
|
+
page,
|
|
119
|
+
f"{func_name}: Navigating to PDF URL to capture final destination...",
|
|
120
|
+
)
|
|
121
|
+
new_page = await context.new_page()
|
|
122
|
+
|
|
123
|
+
# Navigate and wait for redirects - be patient!
|
|
124
|
+
await new_page.goto(pdf_href, wait_until="commit", timeout=60000)
|
|
125
|
+
|
|
126
|
+
# Wait for redirects to complete with longer timeout
|
|
127
|
+
redirect_result = await wait_redirects(
|
|
128
|
+
new_page,
|
|
129
|
+
timeout=60000, # 60 seconds timeout
|
|
130
|
+
show_progress=False,
|
|
131
|
+
track_chain=True,
|
|
132
|
+
auth_aware=True,
|
|
133
|
+
wait_for_idle=True, # Wait for network idle
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
final_url = redirect_result.get("final_url", new_page.url)
|
|
137
|
+
redirect_chain = redirect_result.get("redirect_chain", [])
|
|
138
|
+
|
|
139
|
+
# Additional wait to ensure PDF loads
|
|
140
|
+
await new_page.wait_for_timeout(2000)
|
|
141
|
+
|
|
142
|
+
# Check again for final URL
|
|
143
|
+
final_url = new_page.url
|
|
144
|
+
|
|
145
|
+
await browser_logger.debug(
|
|
146
|
+
page,
|
|
147
|
+
f"{func_name}: Redirect complete after {len(redirect_chain)} steps",
|
|
148
|
+
)
|
|
149
|
+
await browser_logger.debug(
|
|
150
|
+
page, f"{func_name}: Final URL: {final_url[:80]}..."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Check if final URL is a PDF
|
|
154
|
+
if any(
|
|
155
|
+
indicator in final_url
|
|
156
|
+
for indicator in [
|
|
157
|
+
"pdf.sciencedirectassets.com",
|
|
158
|
+
".pdf",
|
|
159
|
+
"application/pdf",
|
|
160
|
+
"/pdf/",
|
|
161
|
+
"pdfft?",
|
|
162
|
+
]
|
|
163
|
+
):
|
|
164
|
+
pdf_urls.append(final_url)
|
|
165
|
+
await browser_logger.debug(
|
|
166
|
+
page, f"{func_name}: Captured final PDF URL via navigation"
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
await browser_logger.debug(
|
|
170
|
+
page,
|
|
171
|
+
f"{func_name}: Final URL doesn't appear to be a PDF: {final_url}",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
await browser_logger.error(
|
|
176
|
+
page, f"{func_name}: Error navigating to PDF: {e}"
|
|
177
|
+
)
|
|
178
|
+
finally:
|
|
179
|
+
if new_page:
|
|
180
|
+
try:
|
|
181
|
+
await new_page.close()
|
|
182
|
+
except:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
return pdf_urls
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
await browser_logger.error(
|
|
189
|
+
page, f"{func_name}: Error finding PDFs via navigation: {e}"
|
|
190
|
+
)
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
async def find_pdf_url_from_sciencedirect_api(
|
|
195
|
+
page: Page, func_name="find_pdf_url_from_sciencedirect_api"
|
|
196
|
+
) -> Optional[str]:
|
|
197
|
+
"""
|
|
198
|
+
Extract PDF URL from ScienceDirect page using their JavaScript context.
|
|
199
|
+
|
|
200
|
+
ScienceDirect pages often have the PDF URL in JavaScript variables.
|
|
201
|
+
"""
|
|
202
|
+
try:
|
|
203
|
+
# Try to extract from page's JavaScript context
|
|
204
|
+
pdf_info = await page.evaluate(
|
|
205
|
+
"""
|
|
206
|
+
() => {
|
|
207
|
+
// Look for PDF URL in various places
|
|
208
|
+
|
|
209
|
+
// Check for download URL in page data
|
|
210
|
+
const pdfLinks = document.querySelectorAll('a[href*="/pdfft?"]');
|
|
211
|
+
if (pdfLinks.length > 0) {
|
|
212
|
+
return pdfLinks[0].href;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Check window.SDM object (ScienceDirect)
|
|
216
|
+
if (window.SDM && window.SDM.pdfUrl) {
|
|
217
|
+
return window.SDM.pdfUrl;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check for entitlement info
|
|
221
|
+
if (window.SD && window.SD.article && window.SD.article.pdfDownloadUrl) {
|
|
222
|
+
return window.SD.article.pdfDownloadUrl;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Look for View PDF button
|
|
226
|
+
const viewPdfBtn = document.querySelector('a[aria-label*="View PDF"]');
|
|
227
|
+
if (viewPdfBtn && viewPdfBtn.href) {
|
|
228
|
+
return viewPdfBtn.href;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Check meta tags
|
|
232
|
+
const pdfMeta = document.querySelector('meta[name="citation_pdf_url"]');
|
|
233
|
+
if (pdfMeta && pdfMeta.content) {
|
|
234
|
+
return pdfMeta.content;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return null;
|
|
238
|
+
}
|
|
239
|
+
"""
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if pdf_info:
|
|
243
|
+
await browser_logger.debug(
|
|
244
|
+
page,
|
|
245
|
+
f"{func_name}: Found PDF info from page context: {pdf_info[:80]}...",
|
|
246
|
+
)
|
|
247
|
+
return pdf_info
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
await browser_logger.debug(
|
|
251
|
+
page, f"Could not extract PDF info from page context: {e}"
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
# EOF
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-10 01:33:13 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/publisher_patterns.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/url/strategies/publisher_patterns.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
__FILE__ = __file__
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from scitex.browser.debugging import browser_logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
async def find_pdf_urls_by_publisher_patterns(
|
|
23
|
+
page,
|
|
24
|
+
url: str = None,
|
|
25
|
+
config=None,
|
|
26
|
+
func_name: str = "find_pdf_urls_by_publisher_patterns"
|
|
27
|
+
) -> List[str]:
|
|
28
|
+
"""
|
|
29
|
+
Generate PDF URLs based on publisher-specific URL patterns.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
page: Playwright page object (unused, for signature consistency)
|
|
33
|
+
url: Page URL to analyze (defaults to page.url if not provided)
|
|
34
|
+
config: ScholarConfig instance (unused, for signature consistency)
|
|
35
|
+
func_name: Function name for logging
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
List of PDF URLs generated from patterns
|
|
39
|
+
"""
|
|
40
|
+
url = url or page.url
|
|
41
|
+
urls_pdf = []
|
|
42
|
+
|
|
43
|
+
# Nature
|
|
44
|
+
if "nature.com" in url and not url.endswith(".pdf"):
|
|
45
|
+
urls_pdf.append(url.rstrip("/") + ".pdf")
|
|
46
|
+
|
|
47
|
+
# Science
|
|
48
|
+
elif "science.org" in url and "/doi/10." in url and "/pdf/" not in url:
|
|
49
|
+
urls_pdf.append(url.replace("/doi/", "/doi/pdf/"))
|
|
50
|
+
|
|
51
|
+
# Elsevier/ScienceDirect
|
|
52
|
+
elif "sciencedirect.com" in url and "/pii/" in url:
|
|
53
|
+
pii = url.split("/pii/")[-1].split("/")[0].split("?")[0]
|
|
54
|
+
urls_pdf.append(
|
|
55
|
+
f"https://www.sciencedirect.com/science/article/pii/{pii}/pdfft"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Wiley
|
|
59
|
+
elif "wiley.com" in url and "/doi/" in url and "/pdfdirect" not in url:
|
|
60
|
+
urls_pdf.append(url.replace("/doi/", "/doi/pdfdirect/"))
|
|
61
|
+
|
|
62
|
+
# Frontiers
|
|
63
|
+
elif "frontiersin.org" in url and "/full" in url:
|
|
64
|
+
urls_pdf.append(url.replace("/full", "/pdf"))
|
|
65
|
+
|
|
66
|
+
# Springer
|
|
67
|
+
elif (
|
|
68
|
+
"springer.com" in url or "link.springer.com" in url
|
|
69
|
+
) and "/article/" in url:
|
|
70
|
+
if not url.endswith(".pdf"):
|
|
71
|
+
urls_pdf.append(url.rstrip("/") + ".pdf")
|
|
72
|
+
|
|
73
|
+
# IEEE
|
|
74
|
+
elif "ieee.org" in url and "/document/" in url:
|
|
75
|
+
doc_id = url.split("/document/")[-1].split("/")[0]
|
|
76
|
+
urls_pdf.append(
|
|
77
|
+
f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id}"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# IOP Publishing
|
|
81
|
+
elif "iopscience.iop.org" in url and "/article/" in url:
|
|
82
|
+
# Pattern: /article/10.1088/1741-2552/aaf92e
|
|
83
|
+
# PDF URL: /article/10.1088/1741-2552/aaf92e/pdf
|
|
84
|
+
if not url.endswith("/pdf"):
|
|
85
|
+
urls_pdf.append(url.rstrip("/") + "/pdf")
|
|
86
|
+
# Also try: /article/10.1088/1741-2552/aaf92e/pdf/metrics
|
|
87
|
+
# Some IOP articles have different PDF locations
|
|
88
|
+
doi_match = re.search(r"/article/(10\.\d+/[\w\-\.]+)", url)
|
|
89
|
+
if doi_match:
|
|
90
|
+
doi = doi_match.group(1)
|
|
91
|
+
# Alternative PDF locations for IOP
|
|
92
|
+
urls_pdf.append(f"https://iopscience.iop.org/article/{doi}/pdf")
|
|
93
|
+
urls_pdf.append(
|
|
94
|
+
f"https://iopscience.iop.org/article/{doi}/pdf/metrics"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# MDPI
|
|
98
|
+
elif "mdpi.com" in url and "/htm" in url:
|
|
99
|
+
urls_pdf.append(url.replace("/htm", "/pdf"))
|
|
100
|
+
|
|
101
|
+
# BMC
|
|
102
|
+
elif "biomedcentral.com" in url and "/articles/" in url:
|
|
103
|
+
urls_pdf.append(url.replace("/articles/", "/track/pdf/"))
|
|
104
|
+
|
|
105
|
+
# PLOS
|
|
106
|
+
elif "plos.org" in url and "/article" in url:
|
|
107
|
+
if "?id=" in url:
|
|
108
|
+
article_id = url.split("?id=")[-1].split("&")[0]
|
|
109
|
+
base_url = url.split("/article")[0]
|
|
110
|
+
urls_pdf.append(
|
|
111
|
+
f"{base_url}/article/file?id={article_id}&type=printable"
|
|
112
|
+
)
|
|
113
|
+
elif "/article/" in url:
|
|
114
|
+
urls_pdf.append(
|
|
115
|
+
url.replace("/article/", "/article/file?id=").split("?")[0]
|
|
116
|
+
+ "&type=printable"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Journal of Neuroscience
|
|
120
|
+
if "jneurosci.org" in url and "/content/" in url:
|
|
121
|
+
# Extract volume/issue/page numbers
|
|
122
|
+
match = re.search(r"/content/(\d+)/(\d+)/(\d+)", url)
|
|
123
|
+
if match:
|
|
124
|
+
vol, issue, page = match.groups()
|
|
125
|
+
urls_pdf.append(
|
|
126
|
+
f"https://www.jneurosci.org/content/jneuro/{vol}/{issue}/{page}.full.pdf"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# eNeuro
|
|
130
|
+
elif "eneuro.org" in url and "/content/" in url:
|
|
131
|
+
# Pattern: /content/3/6/ENEURO.0334-16.2016
|
|
132
|
+
match = re.search(r"/content/[^/]+/[^/]+/(ENEURO\.[^/]+)", url)
|
|
133
|
+
if match:
|
|
134
|
+
eneuro_id = match.group(1)
|
|
135
|
+
urls_pdf.append(
|
|
136
|
+
f"https://www.eneuro.org/content/eneuro/early/recent/{eneuro_id}.full.pdf"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Oxford Academic
|
|
140
|
+
elif "academic.oup.com" in url:
|
|
141
|
+
urls_pdf.append(url.replace("/article/", "/article-pdf/"))
|
|
142
|
+
|
|
143
|
+
# Improve preprint handling
|
|
144
|
+
elif "biorxiv.org" in url or "medrxiv.org" in url:
|
|
145
|
+
# Handle versioned URLs better
|
|
146
|
+
if "/v" in url: # e.g., /v1, /v2
|
|
147
|
+
base_url = url.split("/v")[0]
|
|
148
|
+
urls_pdf.append(f"{base_url}.full.pdf")
|
|
149
|
+
else:
|
|
150
|
+
urls_pdf.append(url + ".full.pdf")
|
|
151
|
+
|
|
152
|
+
elif "arxiv.org" in url:
|
|
153
|
+
if "/abs/" in url:
|
|
154
|
+
arxiv_id = url.split("/abs/")[-1]
|
|
155
|
+
urls_pdf.append(f"https://arxiv.org/pdf/{arxiv_id}.pdf")
|
|
156
|
+
|
|
157
|
+
if urls_pdf:
|
|
158
|
+
await browser_logger.debug(
|
|
159
|
+
page,
|
|
160
|
+
f"{func_name}: Pattern matching found {len(urls_pdf)} URLs"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return urls_pdf
|
|
164
|
+
|
|
165
|
+
# EOF
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-13 06:32:08 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
__FILE__ = __file__
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Find PDF URLs using Python Zotero translators.
|
|
18
|
+
|
|
19
|
+
This module uses the zotero-translators-python package instead of running
|
|
20
|
+
JavaScript translators. It provides better performance, reliability, and
|
|
21
|
+
maintainability compared to the JavaScript-based approach.
|
|
22
|
+
|
|
23
|
+
Features:
|
|
24
|
+
- 100+ Python translators available
|
|
25
|
+
- No JavaScript execution overhead
|
|
26
|
+
- Better error handling
|
|
27
|
+
- Easier debugging
|
|
28
|
+
- Type safety
|
|
29
|
+
|
|
30
|
+
Usage:
|
|
31
|
+
from scitex.scholar.url_finder.strategies import find_pdf_urls_by_zotero_translators
|
|
32
|
+
|
|
33
|
+
pdf_urls = await find_pdf_urls_by_zotero_translators(page, url)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from typing import List
|
|
37
|
+
|
|
38
|
+
from playwright.async_api import Page
|
|
39
|
+
from zotero_translators_python.core.registry import TranslatorRegistry
|
|
40
|
+
|
|
41
|
+
from scitex import logging
|
|
42
|
+
from scitex.browser import browser_logger
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def find_pdf_urls_by_zotero_translators(
|
|
48
|
+
page: Page,
|
|
49
|
+
url: str = None,
|
|
50
|
+
config=None,
|
|
51
|
+
func_name: str = "find_pdf_urls_by_zotero_translators",
|
|
52
|
+
) -> List[str]:
|
|
53
|
+
"""
|
|
54
|
+
Find PDF URLs using Python-based Zotero translators.
|
|
55
|
+
|
|
56
|
+
This is the preferred method over JavaScript translators due to:
|
|
57
|
+
- Better performance (no JS eval overhead)
|
|
58
|
+
- Better reliability (proper error handling)
|
|
59
|
+
- Better maintainability (Python codebase)
|
|
60
|
+
- Better debugging (Python stack traces)
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
page: Playwright page object with loaded content
|
|
64
|
+
url: Current page URL (defaults to page.url if not provided)
|
|
65
|
+
config: ScholarConfig instance (unused, for signature consistency)
|
|
66
|
+
func_name: Function name for logging
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List of PDF URLs extracted by matching translators
|
|
70
|
+
Empty list if no translator matches or extraction fails
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
>>> async with async_playwright() as p:
|
|
74
|
+
... browser = await p.chromium.launch()
|
|
75
|
+
... page = await browser.new_page()
|
|
76
|
+
... await page.goto("https://www.nature.com/articles/nature12345")
|
|
77
|
+
... pdf_urls = await find_pdf_urls_by_zotero_translators(page)
|
|
78
|
+
... print(f"Found {len(pdf_urls)} PDF URLs")
|
|
79
|
+
"""
|
|
80
|
+
url = url or page.url
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
# Get registry of all available translators
|
|
84
|
+
registry = TranslatorRegistry()
|
|
85
|
+
|
|
86
|
+
# Find matching translator for this URL
|
|
87
|
+
matching_translator = registry.get_translator_for_url(url)
|
|
88
|
+
|
|
89
|
+
if not matching_translator:
|
|
90
|
+
await browser_logger.info(
|
|
91
|
+
page, f"{func_name}: No Python translator matches URL: {url}"
|
|
92
|
+
)
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
# Try the matching translator
|
|
96
|
+
all_pdf_urls = []
|
|
97
|
+
try:
|
|
98
|
+
# logger.debug(
|
|
99
|
+
# f"{func_name}: Trying {matching_translator.LABEL} translator..."
|
|
100
|
+
# )
|
|
101
|
+
# logger.debug(f"{func_name}: Page URL: {page.url}")
|
|
102
|
+
# logger.debug(f"{func_name}: Translator target URL: {url}")
|
|
103
|
+
|
|
104
|
+
# Extract PDF URLs using the translator
|
|
105
|
+
pdf_urls = await matching_translator.extract_pdf_urls_async(page)
|
|
106
|
+
|
|
107
|
+
# logger.debug(f"{func_name}: Translator returned: {pdf_urls}")
|
|
108
|
+
|
|
109
|
+
if pdf_urls:
|
|
110
|
+
await browser_logger.debug(
|
|
111
|
+
page,
|
|
112
|
+
f"{func_name}: {matching_translator.LABEL} found {len(pdf_urls)} PDF URL(s)",
|
|
113
|
+
)
|
|
114
|
+
for i_pdf, pdf_url in enumerate(pdf_urls, 1):
|
|
115
|
+
await browser_logger.debug(
|
|
116
|
+
page, f"{func_name} {i_pdf}. {pdf_url}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
all_pdf_urls.extend(pdf_urls)
|
|
120
|
+
else:
|
|
121
|
+
await browser_logger.warning(
|
|
122
|
+
page,
|
|
123
|
+
f"{func_name}: {matching_translator.LABEL} returned empty list - check if page loaded correctly",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
import traceback
|
|
128
|
+
|
|
129
|
+
await browser_logger.error(
|
|
130
|
+
page,
|
|
131
|
+
f"{func_name}: {matching_translator.LABEL} extraction failed: {e}\nTraceback: {traceback.format_exc()}",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# logger.debug(f"{func_name}: Traceback: {traceback.format_exc()}")
|
|
135
|
+
|
|
136
|
+
# Remove duplicates while preserving order
|
|
137
|
+
seen = set()
|
|
138
|
+
unique_urls = []
|
|
139
|
+
for url_pdf in all_pdf_urls:
|
|
140
|
+
if url_pdf not in seen:
|
|
141
|
+
seen.add(url_pdf)
|
|
142
|
+
unique_urls.append(url_pdf)
|
|
143
|
+
|
|
144
|
+
if unique_urls:
|
|
145
|
+
await browser_logger.debug(
|
|
146
|
+
page,
|
|
147
|
+
f"{func_name}: ✓ Python Zotero found {len(unique_urls)} URLs",
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
await browser_logger.debug(
|
|
151
|
+
page, f"{func_name}: No PDF URLs found by Python translators"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return unique_urls
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
await browser_logger.warning(
|
|
158
|
+
page,
|
|
159
|
+
f"{func_name}: Zotero strategy failed: {e}",
|
|
160
|
+
)
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
# EOF
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-10-11 01:19:50 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/find_supplementary_urls_by_href.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = (
|
|
9
|
+
"./src/scitex/scholar/url/strategies/find_supplementary_urls_by_href.py"
|
|
10
|
+
)
|
|
11
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
12
|
+
# ----------------------------------------
|
|
13
|
+
|
|
14
|
+
__FILE__ = __file__
|
|
15
|
+
|
|
16
|
+
from typing import Dict, List
|
|
17
|
+
|
|
18
|
+
from playwright.async_api import Page
|
|
19
|
+
|
|
20
|
+
from scitex import logging
|
|
21
|
+
from scitex.browser import browser_logger
|
|
22
|
+
from scitex.scholar import ScholarConfig
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def find_supplementary_urls_by_href(
|
|
28
|
+
page: Page, config: ScholarConfig = None
|
|
29
|
+
) -> List[Dict]:
|
|
30
|
+
"""Find supplementary material URLs in a web page."""
|
|
31
|
+
await browser_logger.debug(page, "Finding Supplementary URLs...")
|
|
32
|
+
|
|
33
|
+
config = config or ScholarConfig()
|
|
34
|
+
supplementary_selectors = config.resolve(
|
|
35
|
+
"supplementary_selectors",
|
|
36
|
+
default=[
|
|
37
|
+
'a[href*="supplementary"]',
|
|
38
|
+
'a[href*="supplement"]',
|
|
39
|
+
'a[href*="additional"]',
|
|
40
|
+
],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
supplementary = await page.evaluate(
|
|
45
|
+
f"""() => {{
|
|
46
|
+
const results = [];
|
|
47
|
+
const selectors = {supplementary_selectors};
|
|
48
|
+
const seen_urls = new Set();
|
|
49
|
+
|
|
50
|
+
selectors.forEach(selector => {{
|
|
51
|
+
document.querySelectorAll(selector).forEach(link => {{
|
|
52
|
+
if (link.href && !seen_urls.has(link.href)) {{
|
|
53
|
+
seen_urls.add(link.href);
|
|
54
|
+
results.push({{
|
|
55
|
+
url: link.href,
|
|
56
|
+
description: link.textContent.trim(),
|
|
57
|
+
source: 'href_pattern'
|
|
58
|
+
}});
|
|
59
|
+
}}
|
|
60
|
+
}});
|
|
61
|
+
}});
|
|
62
|
+
return results;
|
|
63
|
+
}}"""
|
|
64
|
+
)
|
|
65
|
+
return supplementary
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Error finding supplementary URLs: {e}")
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
# EOF
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Scholar utilities - Organized by function.
|
|
5
|
+
|
|
6
|
+
Structure:
|
|
7
|
+
- text/: Text processing utilities (TextNormalizer)
|
|
8
|
+
- bibtex/: BibTeX parsing utilities
|
|
9
|
+
- cleanup/: Maintenance and cleanup scripts
|
|
10
|
+
|
|
11
|
+
For backward compatibility, TextNormalizer is re-exported at top level.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .text import TextNormalizer
|
|
15
|
+
from .bibtex import parse_bibtex
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"TextNormalizer", # Most commonly used
|
|
19
|
+
"parse_bibtex",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
# EOF
|