scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 00:54:37 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/cross_validation.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Cross-validation helper for streamlined machine learning experiments.
|
|
14
|
+
|
|
15
|
+
Provides a high-level interface for running cross-validation with
|
|
16
|
+
automatic metric tracking, validation, and report generation.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import time
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
import pandas as pd
|
|
26
|
+
from sklearn.model_selection import BaseCrossValidator, StratifiedKFold
|
|
27
|
+
|
|
28
|
+
from .reporters import ClassificationReporter
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CrossValidationExperiment:
|
|
32
|
+
"""
|
|
33
|
+
Streamlined cross-validation experiment runner.
|
|
34
|
+
|
|
35
|
+
This class handles:
|
|
36
|
+
- Cross-validation splitting
|
|
37
|
+
- Model training and evaluation
|
|
38
|
+
- Automatic metric calculation
|
|
39
|
+
- Hyperparameter tracking
|
|
40
|
+
- Progress monitoring
|
|
41
|
+
- Report generation
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
name : str
|
|
46
|
+
Experiment name
|
|
47
|
+
model_fn : Callable
|
|
48
|
+
Function that returns a model instance
|
|
49
|
+
cv : BaseCrossValidator, optional
|
|
50
|
+
Cross-validation splitter (default: 5-fold stratified)
|
|
51
|
+
output_dir : Union[str, Path], optional
|
|
52
|
+
Output directory for results
|
|
53
|
+
metrics : List[str], optional
|
|
54
|
+
List of metrics to calculate
|
|
55
|
+
save_models : bool
|
|
56
|
+
Whether to save trained models
|
|
57
|
+
verbose : bool
|
|
58
|
+
Whether to print progress
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
name: str,
|
|
64
|
+
model_fn: Callable,
|
|
65
|
+
cv: Optional[BaseCrossValidator] = None,
|
|
66
|
+
output_dir: Optional[Union[str, Path]] = None,
|
|
67
|
+
metrics: Optional[List[str]] = None,
|
|
68
|
+
save_models: bool = True,
|
|
69
|
+
verbose: bool = True,
|
|
70
|
+
):
|
|
71
|
+
self.name = name
|
|
72
|
+
self.model_fn = model_fn
|
|
73
|
+
self.cv = cv or StratifiedKFold(
|
|
74
|
+
n_splits=5, shuffle=True, random_state=42
|
|
75
|
+
)
|
|
76
|
+
self.save_models = save_models
|
|
77
|
+
self.verbose = verbose
|
|
78
|
+
|
|
79
|
+
# Initialize reporter
|
|
80
|
+
self.reporter = ClassificationReporter(
|
|
81
|
+
output_dir=output_dir, required_metrics=metrics, verbose=verbose
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Track experiment metadata
|
|
85
|
+
self.metadata = {
|
|
86
|
+
"name": name,
|
|
87
|
+
"start_time": None,
|
|
88
|
+
"end_time": None,
|
|
89
|
+
"n_folds": (
|
|
90
|
+
self.cv.get_n_splits()
|
|
91
|
+
if hasattr(self.cv, "get_n_splits")
|
|
92
|
+
else None
|
|
93
|
+
),
|
|
94
|
+
"hyperparameters": {},
|
|
95
|
+
"dataset_info": {},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Results storage
|
|
99
|
+
self.fold_times = []
|
|
100
|
+
self.models = []
|
|
101
|
+
|
|
102
|
+
def set_hyperparameters(self, **kwargs) -> None:
|
|
103
|
+
"""
|
|
104
|
+
Set hyperparameters for tracking.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
**kwargs
|
|
109
|
+
Hyperparameter key-value pairs
|
|
110
|
+
"""
|
|
111
|
+
self.metadata["hyperparameters"] = kwargs
|
|
112
|
+
|
|
113
|
+
# Save hyperparameters
|
|
114
|
+
self.reporter.add(kwargs, "experiment/hyperparameters.json")
|
|
115
|
+
|
|
116
|
+
def describe_dataset(
|
|
117
|
+
self,
|
|
118
|
+
X: np.ndarray,
|
|
119
|
+
y: np.ndarray,
|
|
120
|
+
feature_names: Optional[List[str]] = None,
|
|
121
|
+
class_names: Optional[List[str]] = None,
|
|
122
|
+
) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Record dataset information.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
X : np.ndarray
|
|
129
|
+
Features
|
|
130
|
+
y : np.ndarray
|
|
131
|
+
Labels
|
|
132
|
+
feature_names : List[str], optional
|
|
133
|
+
Feature names
|
|
134
|
+
class_names : List[str], optional
|
|
135
|
+
Class names
|
|
136
|
+
"""
|
|
137
|
+
self.metadata["dataset_info"] = {
|
|
138
|
+
"n_samples": X.shape[0],
|
|
139
|
+
"n_features": X.shape[1],
|
|
140
|
+
"n_classes": len(np.unique(y)),
|
|
141
|
+
"class_distribution": pd.Series(y).value_counts().to_dict(),
|
|
142
|
+
"feature_names": feature_names,
|
|
143
|
+
"class_names": class_names,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Save dataset info
|
|
147
|
+
self.reporter.add(
|
|
148
|
+
self.metadata["dataset_info"], "experiment/dataset_info.json"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def run(
|
|
152
|
+
self,
|
|
153
|
+
X: np.ndarray,
|
|
154
|
+
y: np.ndarray,
|
|
155
|
+
feature_names: Optional[List[str]] = None,
|
|
156
|
+
class_names: Optional[List[str]] = None,
|
|
157
|
+
calculate_curves: bool = True,
|
|
158
|
+
) -> Dict[str, Any]:
|
|
159
|
+
"""
|
|
160
|
+
Run complete cross-validation experiment.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
X : np.ndarray
|
|
165
|
+
Features
|
|
166
|
+
y : np.ndarray
|
|
167
|
+
Labels
|
|
168
|
+
feature_names : List[str], optional
|
|
169
|
+
Feature names
|
|
170
|
+
class_names : List[str], optional
|
|
171
|
+
Class names
|
|
172
|
+
calculate_curves : bool
|
|
173
|
+
Whether to calculate and plot ROC/PR curves
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
Dict[str, Any]
|
|
178
|
+
Experiment results and paths
|
|
179
|
+
"""
|
|
180
|
+
# Record start time
|
|
181
|
+
self.metadata["start_time"] = datetime.now()
|
|
182
|
+
start_time = time.time()
|
|
183
|
+
|
|
184
|
+
# Describe dataset
|
|
185
|
+
self.describe_dataset(X, y, feature_names, class_names)
|
|
186
|
+
|
|
187
|
+
if self.verbose:
|
|
188
|
+
print("\n" + "=" * 70)
|
|
189
|
+
print(f"CROSS-VALIDATION EXPERIMENT: {self.name}")
|
|
190
|
+
print("=" * 70)
|
|
191
|
+
print(
|
|
192
|
+
f"Dataset: {X.shape[0]} samples, {X.shape[1]} features, {len(np.unique(y))} classes"
|
|
193
|
+
)
|
|
194
|
+
print(f"CV Strategy: {self.cv}")
|
|
195
|
+
print(f"Model: {self.model_fn().__class__.__name__}")
|
|
196
|
+
print("=" * 70 + "\n")
|
|
197
|
+
|
|
198
|
+
# Run cross-validation
|
|
199
|
+
for fold, (train_idx, test_idx) in enumerate(self.cv.split(X, y)):
|
|
200
|
+
fold_start = time.time()
|
|
201
|
+
|
|
202
|
+
if self.verbose:
|
|
203
|
+
print(
|
|
204
|
+
f"\n--- Fold {fold + 1}/{self.cv.get_n_splits()} ---"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Split data
|
|
208
|
+
X_train, X_test = X[train_idx], X[test_idx]
|
|
209
|
+
y_train, y_test = y[train_idx], y[test_idx]
|
|
210
|
+
|
|
211
|
+
# Train model
|
|
212
|
+
model = self.model_fn()
|
|
213
|
+
|
|
214
|
+
if self.verbose:
|
|
215
|
+
print(f"Training {model.__class__.__name__}...")
|
|
216
|
+
|
|
217
|
+
model.fit(X_train, y_train)
|
|
218
|
+
|
|
219
|
+
# Get predictions
|
|
220
|
+
y_pred = model.predict(X_test)
|
|
221
|
+
|
|
222
|
+
# Get probabilities if available
|
|
223
|
+
y_proba = None
|
|
224
|
+
if hasattr(model, "predict_proba"):
|
|
225
|
+
y_proba = model.predict_proba(X_test)
|
|
226
|
+
elif hasattr(model, "decision_function"):
|
|
227
|
+
# For SVM and other models with decision function
|
|
228
|
+
decision = model.decision_function(X_test)
|
|
229
|
+
if len(decision.shape) == 1:
|
|
230
|
+
# Binary classification
|
|
231
|
+
y_proba = np.vstack([1 - decision, decision]).T
|
|
232
|
+
else:
|
|
233
|
+
# Multi-class - use softmax approximation
|
|
234
|
+
y_proba = self._softmax(decision)
|
|
235
|
+
|
|
236
|
+
# Calculate all metrics
|
|
237
|
+
metrics = self.reporter.calculate_all_metrics(
|
|
238
|
+
y_true=y_test,
|
|
239
|
+
y_pred=y_pred,
|
|
240
|
+
y_proba=y_proba,
|
|
241
|
+
labels=class_names,
|
|
242
|
+
fold=fold,
|
|
243
|
+
save=True,
|
|
244
|
+
plot=calculate_curves,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Save model if requested
|
|
248
|
+
if self.save_models:
|
|
249
|
+
model_path = f"models/fold_{fold:02d}_model.pkl"
|
|
250
|
+
self.reporter.add(model, model_path)
|
|
251
|
+
self.models.append(model)
|
|
252
|
+
|
|
253
|
+
# Track timing
|
|
254
|
+
fold_time = time.time() - fold_start
|
|
255
|
+
self.fold_times.append(fold_time)
|
|
256
|
+
|
|
257
|
+
if self.verbose:
|
|
258
|
+
print(f" Fold {fold} completed in {fold_time:.2f}s")
|
|
259
|
+
print(
|
|
260
|
+
f" BA: {metrics.get('balanced_accuracy', 0):.3f}, "
|
|
261
|
+
f"MCC: {metrics.get('mcc', 0):.3f}"
|
|
262
|
+
)
|
|
263
|
+
if "roc_auc" in metrics:
|
|
264
|
+
print(
|
|
265
|
+
f" ROC: {metrics['roc_auc']:.3f}, "
|
|
266
|
+
f"PR: {metrics.get('pr_auc', 0):.3f}"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Record end time
|
|
270
|
+
self.metadata["end_time"] = datetime.now()
|
|
271
|
+
total_time = time.time() - start_time
|
|
272
|
+
|
|
273
|
+
# Save timing information
|
|
274
|
+
timing_info = {
|
|
275
|
+
"total_time": total_time,
|
|
276
|
+
"mean_fold_time": np.mean(self.fold_times),
|
|
277
|
+
"fold_times": self.fold_times,
|
|
278
|
+
}
|
|
279
|
+
self.reporter.add(timing_info, "experiment/timing.json")
|
|
280
|
+
|
|
281
|
+
if self.verbose:
|
|
282
|
+
print(f"\n{'='*70}")
|
|
283
|
+
print(f"Experiment completed in {total_time:.2f}s")
|
|
284
|
+
print(f"Mean fold time: {np.mean(self.fold_times):.2f}s")
|
|
285
|
+
print(f"{'='*70}\n")
|
|
286
|
+
|
|
287
|
+
# Generate final reports
|
|
288
|
+
result_paths = self.reporter.save()
|
|
289
|
+
|
|
290
|
+
# Return results
|
|
291
|
+
return {
|
|
292
|
+
"paths": result_paths,
|
|
293
|
+
"metadata": self.metadata,
|
|
294
|
+
"timing": timing_info,
|
|
295
|
+
"models": self.models if self.save_models else None,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
|
299
|
+
"""Apply softmax to decision values."""
|
|
300
|
+
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
|
301
|
+
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
|
|
302
|
+
|
|
303
|
+
def get_summary(self) -> pd.DataFrame:
|
|
304
|
+
"""Get summary statistics across folds."""
|
|
305
|
+
return self.reporter.create_summary()
|
|
306
|
+
|
|
307
|
+
def get_validation_report(self) -> Dict[str, Any]:
|
|
308
|
+
"""Get validation report."""
|
|
309
|
+
return self.reporter.validate_completeness()
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def quick_experiment(
|
|
313
|
+
X: np.ndarray,
|
|
314
|
+
y: np.ndarray,
|
|
315
|
+
model,
|
|
316
|
+
name: str = "quick_experiment",
|
|
317
|
+
n_folds: int = 5,
|
|
318
|
+
**kwargs,
|
|
319
|
+
) -> Dict[str, Any]:
|
|
320
|
+
"""
|
|
321
|
+
Run a quick cross-validation experiment.
|
|
322
|
+
|
|
323
|
+
This is a convenience function for rapid experimentation.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
X : np.ndarray
|
|
328
|
+
Features
|
|
329
|
+
y : np.ndarray
|
|
330
|
+
Labels
|
|
331
|
+
model : sklearn estimator or callable
|
|
332
|
+
Model instance or function that returns model
|
|
333
|
+
name : str
|
|
334
|
+
Experiment name
|
|
335
|
+
n_folds : int
|
|
336
|
+
Number of CV folds
|
|
337
|
+
**kwargs
|
|
338
|
+
Additional arguments for CrossValidationExperiment
|
|
339
|
+
|
|
340
|
+
Returns
|
|
341
|
+
-------
|
|
342
|
+
Dict[str, Any]
|
|
343
|
+
Experiment results
|
|
344
|
+
|
|
345
|
+
Examples
|
|
346
|
+
--------
|
|
347
|
+
>>> from sklearn.svm import SVC
|
|
348
|
+
>>> results = quick_experiment(X, y, SVC(), name="svm_test")
|
|
349
|
+
>>> print(f"Report saved to: {results['paths']['final_report']}")
|
|
350
|
+
"""
|
|
351
|
+
# Create model function if needed
|
|
352
|
+
if callable(model) and not hasattr(model, "fit"):
|
|
353
|
+
model_fn = model
|
|
354
|
+
else:
|
|
355
|
+
model_fn = lambda: model.__class__(**model.get_params())
|
|
356
|
+
|
|
357
|
+
# Set up cross-validation
|
|
358
|
+
cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
|
|
359
|
+
|
|
360
|
+
# Create experiment
|
|
361
|
+
experiment = CrossValidationExperiment(
|
|
362
|
+
name=name, model_fn=model_fn, cv=cv, **kwargs
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Track hyperparameters if available
|
|
366
|
+
if hasattr(model, "get_params"):
|
|
367
|
+
experiment.set_hyperparameters(**model.get_params())
|
|
368
|
+
|
|
369
|
+
# Run experiment
|
|
370
|
+
results = experiment.run(X, y)
|
|
371
|
+
|
|
372
|
+
return results
|
|
373
|
+
|
|
374
|
+
# EOF
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Classification utilities with unified API."""
|
|
3
|
+
|
|
4
|
+
# Import reporters
|
|
5
|
+
from .reporters import ClassificationReporter, SingleTaskClassificationReporter
|
|
6
|
+
|
|
7
|
+
# Import other existing modules
|
|
8
|
+
from .Classifier import Classifier
|
|
9
|
+
from .CrossValidationExperiment import CrossValidationExperiment, quick_experiment
|
|
10
|
+
|
|
11
|
+
# Import time series module
|
|
12
|
+
from . import timeseries
|
|
13
|
+
|
|
14
|
+
# Import time series CV utilities from submodule
|
|
15
|
+
from .timeseries import (
|
|
16
|
+
TimeSeriesStratifiedSplit,
|
|
17
|
+
TimeSeriesBlockingSplit,
|
|
18
|
+
TimeSeriesSlidingWindowSplit,
|
|
19
|
+
TimeSeriesCalendarSplit,
|
|
20
|
+
TimeSeriesStrategy,
|
|
21
|
+
TimeSeriesMetadata,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Backward compatibility alias
|
|
25
|
+
CVExperiment = CrossValidationExperiment
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# Reporters
|
|
29
|
+
"ClassificationReporter",
|
|
30
|
+
"SingleTaskClassificationReporter",
|
|
31
|
+
# Classifier management
|
|
32
|
+
"Classifier",
|
|
33
|
+
# Cross-validation
|
|
34
|
+
"CrossValidationExperiment",
|
|
35
|
+
"CVExperiment", # Alias
|
|
36
|
+
"quick_experiment",
|
|
37
|
+
# Time series module
|
|
38
|
+
"timeseries",
|
|
39
|
+
# Time series CV splitters (re-exported from timeseries module)
|
|
40
|
+
"TimeSeriesStratifiedSplit",
|
|
41
|
+
"TimeSeriesBlockingSplit",
|
|
42
|
+
"TimeSeriesSlidingWindowSplit",
|
|
43
|
+
"TimeSeriesCalendarSplit",
|
|
44
|
+
"TimeSeriesStrategy",
|
|
45
|
+
"TimeSeriesMetadata",
|
|
46
|
+
]
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 15:00:10 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/reporters/_BaseClassificationReporter.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Base Classification Reporter - Unified API Interface.
|
|
14
|
+
|
|
15
|
+
This module provides the base class and interface for all classification reporters,
|
|
16
|
+
ensuring consistent APIs and behavior across single-task and multi-task scenarios.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Dict, List, Optional, Union
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from scitex import logging
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BaseClassificationReporter(ABC):
|
|
30
|
+
"""
|
|
31
|
+
Abstract base class for all classification reporters.
|
|
32
|
+
|
|
33
|
+
This class defines the unified API that all classification reporters must implement,
|
|
34
|
+
ensuring consistent parameter names, method signatures, and behavior.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
output_dir : Union[str, Path]
|
|
39
|
+
Base directory for outputs. If None, creates timestamped directory.
|
|
40
|
+
precision : int, default 3
|
|
41
|
+
Number of decimal places for numerical outputs
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
output_dir: Union[str, Path],
|
|
47
|
+
precision: int = 3,
|
|
48
|
+
verbose=True,
|
|
49
|
+
):
|
|
50
|
+
self.precision = precision
|
|
51
|
+
self._dirs_created = False
|
|
52
|
+
|
|
53
|
+
# Set default output directory if not provided
|
|
54
|
+
if output_dir is None:
|
|
55
|
+
from datetime import datetime
|
|
56
|
+
|
|
57
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
58
|
+
output_dir = Path(f"./results/classification_{timestamp}")
|
|
59
|
+
|
|
60
|
+
self.output_dir = Path(output_dir)
|
|
61
|
+
|
|
62
|
+
if verbose:
|
|
63
|
+
logger.info(
|
|
64
|
+
f"Classification reporter initialized (output directory: {str(output_dir)})"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def _create_subdir_if_needed(self, subdir: str) -> Path:
|
|
68
|
+
"""Create a subdirectory only when needed."""
|
|
69
|
+
subdir_path = self.output_dir / subdir
|
|
70
|
+
subdir_path.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
return subdir_path
|
|
72
|
+
|
|
73
|
+
def _round_numeric(self, value: Any) -> Any:
|
|
74
|
+
"""Round numeric values to specified precision."""
|
|
75
|
+
if isinstance(value, (int, float, np.integer, np.floating)):
|
|
76
|
+
return round(float(value), self.precision)
|
|
77
|
+
elif isinstance(value, dict):
|
|
78
|
+
return {k: self._round_numeric(v) for k, v in value.items()}
|
|
79
|
+
elif isinstance(value, (list, tuple)):
|
|
80
|
+
return type(value)(self._round_numeric(v) for v in value)
|
|
81
|
+
else:
|
|
82
|
+
return value
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def calculate_metrics(
|
|
86
|
+
self,
|
|
87
|
+
y_true: np.ndarray,
|
|
88
|
+
y_pred: np.ndarray,
|
|
89
|
+
y_proba: Optional[np.ndarray] = None,
|
|
90
|
+
labels: Optional[List[str]] = None,
|
|
91
|
+
fold: Optional[int] = None,
|
|
92
|
+
verbose=True,
|
|
93
|
+
) -> Dict[str, Any]:
|
|
94
|
+
"""
|
|
95
|
+
Calculate and save classification metrics.
|
|
96
|
+
|
|
97
|
+
This is the unified method signature that all reporters must implement.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
y_true : np.ndarray
|
|
102
|
+
True class labels
|
|
103
|
+
y_pred : np.ndarray
|
|
104
|
+
Predicted class labels
|
|
105
|
+
y_proba : np.ndarray, optional
|
|
106
|
+
Prediction probabilities (required for AUC metrics)
|
|
107
|
+
labels : List[str], optional
|
|
108
|
+
Class labels for display
|
|
109
|
+
fold : int, optional
|
|
110
|
+
Fold index for cross-validation
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
Dict[str, Any]
|
|
115
|
+
Dictionary of calculated metrics
|
|
116
|
+
"""
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
121
|
+
"""
|
|
122
|
+
Get summary of all calculated metrics.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
Dict[str, Any]
|
|
127
|
+
Summary of metrics across all folds/tasks
|
|
128
|
+
"""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
def get_output_paths(self) -> Dict[str, Path]:
|
|
132
|
+
"""
|
|
133
|
+
Get dictionary of output paths for different content types.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
Dict[str, Path]
|
|
138
|
+
Mapping of content types to their paths
|
|
139
|
+
"""
|
|
140
|
+
return {
|
|
141
|
+
"base": self.output_dir,
|
|
142
|
+
"metrics": self.output_dir / "metrics",
|
|
143
|
+
"plots": self.output_dir / "plots",
|
|
144
|
+
"tables": self.output_dir / "tables",
|
|
145
|
+
"reports": self.output_dir / "reports",
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
def __repr__(self) -> str:
|
|
149
|
+
return f"{self.__class__.__name__}(output_dir='{self.output_dir}')"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ReporterConfig:
|
|
153
|
+
"""
|
|
154
|
+
Configuration class for classification reporters.
|
|
155
|
+
|
|
156
|
+
This class encapsulates all configuration settings for classification
|
|
157
|
+
reporters, providing a centralized way to manage reporting behavior,
|
|
158
|
+
output formatting, and metric requirements.
|
|
159
|
+
|
|
160
|
+
Attributes
|
|
161
|
+
----------
|
|
162
|
+
precision : int
|
|
163
|
+
Number of decimal places for numerical outputs in reports
|
|
164
|
+
required_metrics : List[str]
|
|
165
|
+
List of metric names that must be calculated and reported
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> # Create default configuration
|
|
170
|
+
>>> config = ReporterConfig()
|
|
171
|
+
>>>
|
|
172
|
+
>>> # Create custom configuration with higher precision
|
|
173
|
+
>>> config = ReporterConfig(precision=5)
|
|
174
|
+
>>>
|
|
175
|
+
>>> # Create configuration with specific required metrics
|
|
176
|
+
>>> config = ReporterConfig(
|
|
177
|
+
... precision=4,
|
|
178
|
+
... required_metrics=["balanced_accuracy", "mcc", "roc_auc"]
|
|
179
|
+
... )
|
|
180
|
+
>>>
|
|
181
|
+
>>> # Convert to dictionary for serialization
|
|
182
|
+
>>> config_dict = config.to_dict()
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
precision: int = 3,
|
|
188
|
+
required_metrics: Optional[List[str]] = [
|
|
189
|
+
"balanced_accuracy",
|
|
190
|
+
"mcc",
|
|
191
|
+
"confusion_matrix",
|
|
192
|
+
"classification_report",
|
|
193
|
+
"roc_auc",
|
|
194
|
+
"roc_curve",
|
|
195
|
+
"pre_rec_auc",
|
|
196
|
+
"pre_rec_curve",
|
|
197
|
+
],
|
|
198
|
+
verbose=True,
|
|
199
|
+
):
|
|
200
|
+
"""
|
|
201
|
+
Initialize ReporterConfig with specified settings.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
precision : int, default 3
|
|
206
|
+
Number of decimal places for numerical outputs.
|
|
207
|
+
Controls the precision of all floating-point values in reports,
|
|
208
|
+
metrics, and summaries. Higher values provide more detail but
|
|
209
|
+
may introduce numerical noise.
|
|
210
|
+
|
|
211
|
+
required_metrics : List[str], optional
|
|
212
|
+
List of metric names that must be calculated and reported.
|
|
213
|
+
If None, defaults to a comprehensive set including:
|
|
214
|
+
- balanced_accuracy: Accuracy adjusted for class imbalance
|
|
215
|
+
- mcc: Matthews Correlation Coefficient
|
|
216
|
+
- confusion_matrix: True/predicted class counts
|
|
217
|
+
- classification_report: Per-class precision/recall/F1
|
|
218
|
+
- roc_auc: Area under ROC curve (binary/multiclass)
|
|
219
|
+
- roc_curve: ROC curve data points
|
|
220
|
+
- pre_rec_auc: Area under Precision-Recall curve
|
|
221
|
+
- pre_rec_curve: PR curve data points
|
|
222
|
+
|
|
223
|
+
Notes
|
|
224
|
+
-----
|
|
225
|
+
The precision setting affects:
|
|
226
|
+
- Metric values in reports and summaries
|
|
227
|
+
- CSV and JSON output files
|
|
228
|
+
- Console display formatting
|
|
229
|
+
- Plot annotations and labels
|
|
230
|
+
|
|
231
|
+
The required_metrics list is used for:
|
|
232
|
+
- Validation of calculated metrics
|
|
233
|
+
- Determining which plots to generate
|
|
234
|
+
- Structuring output directories
|
|
235
|
+
- Creating comprehensive reports
|
|
236
|
+
"""
|
|
237
|
+
self.precision = precision
|
|
238
|
+
|
|
239
|
+
# Set default comprehensive metrics if not provided
|
|
240
|
+
self.required_metrics = required_metrics
|
|
241
|
+
|
|
242
|
+
required_metrics_str = ""
|
|
243
|
+
for required_metric in required_metrics:
|
|
244
|
+
required_metrics_str += f" {required_metric}\n"
|
|
245
|
+
if verbose:
|
|
246
|
+
logger.info(
|
|
247
|
+
(
|
|
248
|
+
f"Config set as:\n"
|
|
249
|
+
f" precision: {precision}\n"
|
|
250
|
+
f" required_metrics:\n"
|
|
251
|
+
f"{required_metrics_str}"
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
256
|
+
"""
|
|
257
|
+
Convert configuration to dictionary format.
|
|
258
|
+
|
|
259
|
+
Useful for serialization, logging, and saving configuration
|
|
260
|
+
alongside experiment results.
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
Dict[str, Any]
|
|
265
|
+
Dictionary containing all configuration parameters with keys:
|
|
266
|
+
- 'precision': int, decimal precision setting
|
|
267
|
+
- 'required_metrics': List[str], required metric names
|
|
268
|
+
|
|
269
|
+
Examples
|
|
270
|
+
--------
|
|
271
|
+
>>> config = ReporterConfig(precision=4)
|
|
272
|
+
>>> config_dict = config.to_dict()
|
|
273
|
+
>>> print(config_dict)
|
|
274
|
+
{'precision': 4, 'required_metrics': [...]}
|
|
275
|
+
"""
|
|
276
|
+
return {
|
|
277
|
+
"precision": self.precision,
|
|
278
|
+
"required_metrics": self.required_metrics,
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
# EOF
|