scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1056 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 03:00:00 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ai/classification/reporter_utils/reporting.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Report generation utilities for classification reporters.
|
|
14
|
+
|
|
15
|
+
Supports multiple output formats:
|
|
16
|
+
- Org-mode with inline images
|
|
17
|
+
- Markdown with embedded plots
|
|
18
|
+
- LaTeX for academic papers
|
|
19
|
+
- Paper export functionality
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any, Dict, List, Optional, Union
|
|
24
|
+
import numpy as np
|
|
25
|
+
import pandas as pd
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
import yaml
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def generate_org_report(
|
|
31
|
+
results: Dict[str, Any],
|
|
32
|
+
output_path: Union[str, Path],
|
|
33
|
+
include_plots: bool = True,
|
|
34
|
+
verbose: bool = True,
|
|
35
|
+
convert_formats: bool = True,
|
|
36
|
+
) -> Path:
|
|
37
|
+
"""
|
|
38
|
+
Generate org-mode report with inline images and optional pandoc conversions.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
results : Dict[str, Any]
|
|
43
|
+
Classification results dictionary
|
|
44
|
+
output_path : Union[str, Path]
|
|
45
|
+
Output file path
|
|
46
|
+
include_plots : bool, default True
|
|
47
|
+
Whether to include plot images
|
|
48
|
+
verbose : bool, default True
|
|
49
|
+
Whether to print progress messages
|
|
50
|
+
convert_formats : bool, default True
|
|
51
|
+
Whether to use pandoc to generate other formats
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
Path
|
|
56
|
+
Path to generated report
|
|
57
|
+
"""
|
|
58
|
+
output_path = Path(output_path)
|
|
59
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
# Try to get CONFIG from results first (passed from memory)
|
|
62
|
+
config_data = None
|
|
63
|
+
# Debug logging
|
|
64
|
+
from scitex.logging import getLogger
|
|
65
|
+
logger = getLogger(__name__)
|
|
66
|
+
|
|
67
|
+
# First check if session_config was passed in results
|
|
68
|
+
if 'session_config' in results and results['session_config'] is not None:
|
|
69
|
+
# Convert CONFIG object to dict, only keeping useful fields
|
|
70
|
+
session_config = results['session_config']
|
|
71
|
+
config_data = {}
|
|
72
|
+
|
|
73
|
+
# List of keys to include (uppercase attributes that are not methods)
|
|
74
|
+
useful_keys = ['ID', 'PID', 'START_TIME', 'END_TIME', 'RUN_TIME',
|
|
75
|
+
'FILE', 'FILE_PATH', 'SDIR', 'SDIR_PATH',
|
|
76
|
+
'REL_SDIR', 'REL_SDIR_PATH', 'ARGS', 'EXIT_STATUS']
|
|
77
|
+
|
|
78
|
+
for key in useful_keys:
|
|
79
|
+
if hasattr(session_config, key):
|
|
80
|
+
value = getattr(session_config, key)
|
|
81
|
+
# Convert to string for display, avoiding repr() formatting
|
|
82
|
+
if value is not None:
|
|
83
|
+
if isinstance(value, (str, int, float, bool)):
|
|
84
|
+
config_data[key] = value
|
|
85
|
+
else:
|
|
86
|
+
# Use str() for cleaner output
|
|
87
|
+
config_data[key] = str(value)
|
|
88
|
+
|
|
89
|
+
logger.info(f"Using session CONFIG from memory with {len(config_data)} keys")
|
|
90
|
+
|
|
91
|
+
# Fallback to loading from file if not provided
|
|
92
|
+
if config_data is None:
|
|
93
|
+
try:
|
|
94
|
+
# Try different possible locations for CONFIG.yaml
|
|
95
|
+
# The report is generated in classification_results/reports/
|
|
96
|
+
# CONFIG.yaml is in the session directory under CONFIGS/
|
|
97
|
+
|
|
98
|
+
# Get the session directory from the output path
|
|
99
|
+
# output_path is like: /path/to/RUNNING/ID/classification_results/reports/report.org
|
|
100
|
+
# We need: /path/to/RUNNING/ID/CONFIGS/CONFIG.yaml
|
|
101
|
+
|
|
102
|
+
# output_path.parent = /path/to/RUNNING/ID/classification_results/reports/
|
|
103
|
+
# output_path.parent.parent = /path/to/RUNNING/ID/classification_results/
|
|
104
|
+
# output_path.parent.parent.parent = /path/to/RUNNING/ID/ <- Session root!
|
|
105
|
+
|
|
106
|
+
session_dir = output_path.parent.parent.parent
|
|
107
|
+
possible_paths = [
|
|
108
|
+
session_dir / "CONFIGS" / "CONFIG.yaml", # This should be the correct path
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
logger.info(f"Looking for CONFIG.yaml from: {output_path}")
|
|
112
|
+
logger.info(f"Output path parent dirs: {output_path.parent}, {output_path.parent.parent}, {output_path.parent.parent.parent}")
|
|
113
|
+
config_path = None
|
|
114
|
+
for path in possible_paths:
|
|
115
|
+
logger.info(f"Checking path: {path}, exists: {path.exists()}")
|
|
116
|
+
if path.exists():
|
|
117
|
+
config_path = path
|
|
118
|
+
logger.info(f"Found CONFIG at path: {config_path}")
|
|
119
|
+
break
|
|
120
|
+
|
|
121
|
+
# Try to get from session output directory if not found
|
|
122
|
+
if not config_path and 'config' in results and 'output_dir' in results['config']:
|
|
123
|
+
# The output_dir is like /path/to/RUNNING/ID/classification_results
|
|
124
|
+
# We need to go to /path/to/RUNNING/ID/CONFIGS/CONFIG.yaml
|
|
125
|
+
output_dir = Path(results['config']['output_dir'])
|
|
126
|
+
# Go up to session dir (from classification_results to session root)
|
|
127
|
+
session_dir = output_dir.parent
|
|
128
|
+
config_path = session_dir / "CONFIGS" / "CONFIG.yaml"
|
|
129
|
+
logger.info(f"Trying session path: {config_path}, exists: {config_path.exists()}")
|
|
130
|
+
if not config_path.exists():
|
|
131
|
+
config_path = None
|
|
132
|
+
|
|
133
|
+
if config_path and config_path.exists():
|
|
134
|
+
logger.info(f"Found CONFIG at: {config_path}")
|
|
135
|
+
with open(config_path, 'r') as config_file:
|
|
136
|
+
config_data = yaml.safe_load(config_file)
|
|
137
|
+
logger.info(f"Successfully loaded CONFIG with {len(config_data)} keys")
|
|
138
|
+
else:
|
|
139
|
+
logger.warning(f"No CONFIG.yaml found in any expected location")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
# Always log the error for debugging
|
|
142
|
+
logger.warning(f"Could not load CONFIG.yaml: {e}")
|
|
143
|
+
import traceback
|
|
144
|
+
logger.warning(f"Traceback: {traceback.format_exc()}")
|
|
145
|
+
|
|
146
|
+
with open(output_path, 'w') as f:
|
|
147
|
+
# Header
|
|
148
|
+
f.write("#+TITLE: Classification Results Report\n")
|
|
149
|
+
f.write(f"#+AUTHOR: SciTeX Classification Reporter\n")
|
|
150
|
+
f.write(f"#+DATE: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
151
|
+
f.write("#+OPTIONS: toc:2 num:t\n")
|
|
152
|
+
f.write("#+STARTUP: overview inlineimages\n")
|
|
153
|
+
f.write("#+HTML_HEAD: <style>img { cursor: zoom-in; } img:active { transform: scale(2.5); z-index: 999; position: relative; }</style>\n")
|
|
154
|
+
f.write("#+ATTR_ORG: :width 400\n\n") # Default width for all images
|
|
155
|
+
|
|
156
|
+
# Get configuration and fold info
|
|
157
|
+
config = results.get('config', {})
|
|
158
|
+
n_folds = config.get('n_folds', len(results.get('folds', [])))
|
|
159
|
+
|
|
160
|
+
# Dataset information (sample sizes) - extract from folds
|
|
161
|
+
f.write("* Dataset Information\n\n")
|
|
162
|
+
if 'folds' in results and results['folds']:
|
|
163
|
+
# Create table header
|
|
164
|
+
sample_header = "| Fold | Train Total | Train Seizure | Train Interictal | Test Total | Test Seizure | Test Interictal |"
|
|
165
|
+
sample_separator = "|------|-------------|---------------|------------------|------------|--------------|-----------------|"
|
|
166
|
+
|
|
167
|
+
f.write(sample_header + "\n")
|
|
168
|
+
f.write(sample_separator + "\n")
|
|
169
|
+
|
|
170
|
+
# Add sample size info for each fold if available
|
|
171
|
+
for fold_data in results['folds']:
|
|
172
|
+
fold_id = fold_data.get('fold_id', '?')
|
|
173
|
+
# Sample sizes might be in fold_data directly or we need to compute
|
|
174
|
+
n_train = fold_data.get('n_train', '-')
|
|
175
|
+
n_test = fold_data.get('n_test', '-')
|
|
176
|
+
n_train_seizure = fold_data.get('n_train_seizure', '-')
|
|
177
|
+
n_train_interictal = fold_data.get('n_train_interictal', '-')
|
|
178
|
+
n_test_seizure = fold_data.get('n_test_seizure', '-')
|
|
179
|
+
n_test_interictal = fold_data.get('n_test_interictal', '-')
|
|
180
|
+
|
|
181
|
+
row = f"| {fold_id:02d} | {n_train} | {n_train_seizure} | {n_train_interictal} | {n_test} | {n_test_seizure} | {n_test_interictal} |"
|
|
182
|
+
f.write(row + "\n")
|
|
183
|
+
f.write("\n")
|
|
184
|
+
|
|
185
|
+
f.write("* Summary Performance\n\n")
|
|
186
|
+
|
|
187
|
+
# Create comprehensive metrics table including per-fold values
|
|
188
|
+
if 'summary' in results and results['summary']:
|
|
189
|
+
# Build header with fold columns
|
|
190
|
+
header = "| Metric |"
|
|
191
|
+
separator = "|--------|"
|
|
192
|
+
for i in range(n_folds):
|
|
193
|
+
header += f" Fold {i:02d} |"
|
|
194
|
+
separator += "---------|"
|
|
195
|
+
header += " Mean ± Std |"
|
|
196
|
+
separator += "------------|"
|
|
197
|
+
|
|
198
|
+
f.write(header + "\n")
|
|
199
|
+
f.write(separator + "\n")
|
|
200
|
+
|
|
201
|
+
# Display metrics in specific order
|
|
202
|
+
metric_order = ['balanced_accuracy', 'mcc', 'roc_auc', 'pr_auc']
|
|
203
|
+
metric_display_names = {
|
|
204
|
+
'balanced_accuracy': 'Balanced Accuracy',
|
|
205
|
+
'mcc': 'MCC',
|
|
206
|
+
'roc_auc': 'ROC AUC',
|
|
207
|
+
'pr_auc': 'PR AUC'
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
# Collect fold values
|
|
211
|
+
for metric_name in metric_order:
|
|
212
|
+
if metric_name in results['summary']:
|
|
213
|
+
stats = results['summary'][metric_name]
|
|
214
|
+
if isinstance(stats, dict) and 'mean' in stats:
|
|
215
|
+
row = f"| {metric_display_names.get(metric_name, metric_name)} |"
|
|
216
|
+
|
|
217
|
+
# Add individual fold values (rounded to 3 digits)
|
|
218
|
+
fold_values = stats.get('values', [])
|
|
219
|
+
for i in range(n_folds):
|
|
220
|
+
if i < len(fold_values):
|
|
221
|
+
row += f" {fold_values[i]:.3f} |"
|
|
222
|
+
else:
|
|
223
|
+
row += " - |"
|
|
224
|
+
|
|
225
|
+
# Add mean ± std (rounded to 3 digits)
|
|
226
|
+
mean = stats.get('mean', 0)
|
|
227
|
+
std = stats.get('std', 0)
|
|
228
|
+
row += f" {mean:.3f} ± {std:.3f} |"
|
|
229
|
+
f.write(row + "\n")
|
|
230
|
+
f.write("\n")
|
|
231
|
+
|
|
232
|
+
# Feature Importance section
|
|
233
|
+
if 'summary' in results and 'feature_importance' in results['summary']:
|
|
234
|
+
f.write("* Feature Importance\n\n")
|
|
235
|
+
feature_imp = results['summary']['feature_importance']
|
|
236
|
+
|
|
237
|
+
if 'mean' in feature_imp:
|
|
238
|
+
# Create feature importance table
|
|
239
|
+
f.write("| Feature | Mean | Std | Min | Max | CV |\n")
|
|
240
|
+
f.write("|---------|------|-----|-----|-----|----|\n")
|
|
241
|
+
|
|
242
|
+
# Sort by mean importance (descending)
|
|
243
|
+
features_sorted = sorted(
|
|
244
|
+
feature_imp['mean'].items(),
|
|
245
|
+
key=lambda x: x[1],
|
|
246
|
+
reverse=True
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
for feature_name, mean_imp in features_sorted:
|
|
250
|
+
std_imp = feature_imp['std'].get(feature_name, 0)
|
|
251
|
+
min_imp = feature_imp['min'].get(feature_name, 0)
|
|
252
|
+
max_imp = feature_imp['max'].get(feature_name, 0)
|
|
253
|
+
cv_imp = feature_imp['cv'].get(feature_name, 0)
|
|
254
|
+
|
|
255
|
+
f.write(f"| {feature_name} | {mean_imp:.3f} | {std_imp:.3f} | "
|
|
256
|
+
f"{min_imp:.3f} | {max_imp:.3f} | {cv_imp:.3f} |\n")
|
|
257
|
+
f.write("\n")
|
|
258
|
+
|
|
259
|
+
f.write("*Note:* CV = Coefficient of Variation (std/mean), "
|
|
260
|
+
"indicating stability across folds.\n\n")
|
|
261
|
+
|
|
262
|
+
# Visualizations section
|
|
263
|
+
if include_plots and 'plots' in results:
|
|
264
|
+
f.write("* Visualizations\n\n")
|
|
265
|
+
|
|
266
|
+
# Confusion Matrices - all in one section
|
|
267
|
+
f.write("** Confusion Matrices\n\n")
|
|
268
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
269
|
+
f.write("<div style='display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; max-width: 100%;'>\n")
|
|
270
|
+
f.write("#+END_EXPORT\n\n")
|
|
271
|
+
|
|
272
|
+
# CV Summary confusion matrix first
|
|
273
|
+
cv_summary_plots = {k: v for k, v in results['plots'].items() if 'cv_summary' in k or 'cv-summary' in k}
|
|
274
|
+
# Support both old (confusion_matrix) and new (confusion-matrix) naming
|
|
275
|
+
cm_plots = [v for k, v in cv_summary_plots.items() if ('confusion_matrix' in k or 'confusion-matrix' in k)]
|
|
276
|
+
|
|
277
|
+
if cm_plots:
|
|
278
|
+
for plot_path in cm_plots:
|
|
279
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
280
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
281
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
282
|
+
f.write("<div style='text-align: center;'>\n")
|
|
283
|
+
f.write("#+END_EXPORT\n")
|
|
284
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
285
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
286
|
+
f.write("#+CAPTION: Overall\n")
|
|
287
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
288
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
289
|
+
f.write("</div>\n")
|
|
290
|
+
f.write("#+END_EXPORT\n\n")
|
|
291
|
+
|
|
292
|
+
# Individual fold confusion matrices
|
|
293
|
+
for fold in range(n_folds):
|
|
294
|
+
# Look for plots with exact fold matching
|
|
295
|
+
fold_key = f'fold_{fold:02d}'
|
|
296
|
+
fold_plots = {k: v for k, v in results['plots'].items() if fold_key in k}
|
|
297
|
+
# Support both old (confusion_matrix) and new (confusion-matrix) naming
|
|
298
|
+
fold_cm = [v for k, v in fold_plots.items() if ('confusion_matrix' in k or 'confusion-matrix' in k)]
|
|
299
|
+
|
|
300
|
+
if fold_cm and len(fold_cm) > 0:
|
|
301
|
+
# Take only the first matching confusion matrix for this fold
|
|
302
|
+
plot_path = fold_cm[0]
|
|
303
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
304
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
305
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
306
|
+
f.write("<div style='text-align: center;'>\n")
|
|
307
|
+
f.write("#+END_EXPORT\n")
|
|
308
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
309
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
310
|
+
f.write(f"#+CAPTION: Fold {fold:02d}\n")
|
|
311
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
312
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
313
|
+
f.write("</div>\n")
|
|
314
|
+
f.write("#+END_EXPORT\n\n")
|
|
315
|
+
|
|
316
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
317
|
+
f.write("</div>\n")
|
|
318
|
+
f.write("#+END_EXPORT\n\n")
|
|
319
|
+
|
|
320
|
+
# ROC Curves - all in one section
|
|
321
|
+
f.write("** ROC Curves\n\n")
|
|
322
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
323
|
+
f.write("<div style='display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; max-width: 100%;'>\n")
|
|
324
|
+
f.write("#+END_EXPORT\n\n")
|
|
325
|
+
|
|
326
|
+
# CV Summary ROC curve (support both old and new naming)
|
|
327
|
+
roc_plots = [v for k, v in cv_summary_plots.items() if ('roc_curve' in k or 'roc-curve' in k)]
|
|
328
|
+
if roc_plots:
|
|
329
|
+
for plot_path in roc_plots:
|
|
330
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
331
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
332
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
333
|
+
f.write("<div style='text-align: center;'>\n")
|
|
334
|
+
f.write("#+END_EXPORT\n")
|
|
335
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
336
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
337
|
+
f.write("#+CAPTION: Overall\n")
|
|
338
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
339
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
340
|
+
f.write("</div>\n")
|
|
341
|
+
f.write("#+END_EXPORT\n\n")
|
|
342
|
+
|
|
343
|
+
# Individual fold ROC curves
|
|
344
|
+
for fold in range(n_folds):
|
|
345
|
+
# Look for plots with exact fold matching
|
|
346
|
+
fold_key = f'fold_{fold:02d}'
|
|
347
|
+
fold_plots = {k: v for k, v in results['plots'].items() if fold_key in k}
|
|
348
|
+
# Support both old (roc_curve) and new (roc-curve) naming
|
|
349
|
+
fold_roc = [v for k, v in fold_plots.items() if ('roc_curve' in k or 'roc-curve' in k)]
|
|
350
|
+
|
|
351
|
+
if fold_roc and len(fold_roc) > 0:
|
|
352
|
+
# Take only the first matching ROC curve for this fold
|
|
353
|
+
plot_path = fold_roc[0]
|
|
354
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
355
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
356
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
357
|
+
f.write("<div style='text-align: center;'>\n")
|
|
358
|
+
f.write("#+END_EXPORT\n")
|
|
359
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
360
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
361
|
+
f.write(f"#+CAPTION: Fold {fold:02d}\n")
|
|
362
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
363
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
364
|
+
f.write("</div>\n")
|
|
365
|
+
f.write("#+END_EXPORT\n\n")
|
|
366
|
+
|
|
367
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
368
|
+
f.write("</div>\n")
|
|
369
|
+
f.write("#+END_EXPORT\n\n")
|
|
370
|
+
|
|
371
|
+
# PR Curves - all in one section
|
|
372
|
+
f.write("** Precision-Recall Curves\n\n")
|
|
373
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
374
|
+
f.write("<div style='display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; max-width: 100%;'>\n")
|
|
375
|
+
f.write("#+END_EXPORT\n\n")
|
|
376
|
+
|
|
377
|
+
# CV Summary PR curve (support both old and new naming)
|
|
378
|
+
pr_plots = [v for k, v in cv_summary_plots.items() if ('pr_curve' in k or 'pr-curve' in k)]
|
|
379
|
+
if pr_plots:
|
|
380
|
+
for plot_path in pr_plots:
|
|
381
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
382
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
383
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
384
|
+
f.write("<div style='text-align: center;'>\n")
|
|
385
|
+
f.write("#+END_EXPORT\n")
|
|
386
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
387
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
388
|
+
f.write("#+CAPTION: Overall\n")
|
|
389
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
390
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
391
|
+
f.write("</div>\n")
|
|
392
|
+
f.write("#+END_EXPORT\n\n")
|
|
393
|
+
|
|
394
|
+
# Individual fold PR curves
|
|
395
|
+
for fold in range(n_folds):
|
|
396
|
+
# Look for plots with exact fold matching
|
|
397
|
+
fold_key = f'fold_{fold:02d}'
|
|
398
|
+
fold_plots = {k: v for k, v in results['plots'].items() if fold_key in k}
|
|
399
|
+
# Support both old (pr_curve) and new (pr-curve) naming
|
|
400
|
+
fold_pr = [v for k, v in fold_plots.items() if ('pr_curve' in k or 'pr-curve' in k)]
|
|
401
|
+
|
|
402
|
+
if fold_pr and len(fold_pr) > 0:
|
|
403
|
+
# Take only the first matching PR curve for this fold
|
|
404
|
+
plot_path = fold_pr[0]
|
|
405
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
406
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
407
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
408
|
+
f.write("<div style='text-align: center;'>\n")
|
|
409
|
+
f.write("#+END_EXPORT\n")
|
|
410
|
+
f.write("#+ATTR_ORG: :width 250\n")
|
|
411
|
+
f.write("#+ATTR_HTML: :width 100% :style max-width:250px\n")
|
|
412
|
+
f.write(f"#+CAPTION: Fold {fold:02d}\n")
|
|
413
|
+
f.write(f"[[file:{rel_path}]]\n")
|
|
414
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
415
|
+
f.write("</div>\n")
|
|
416
|
+
f.write("#+END_EXPORT\n\n")
|
|
417
|
+
|
|
418
|
+
f.write("#+BEGIN_EXPORT html\n")
|
|
419
|
+
f.write("</div>\n")
|
|
420
|
+
f.write("#+END_EXPORT\n\n")
|
|
421
|
+
|
|
422
|
+
# Experiment Configuration section at the end (less prominent)
|
|
423
|
+
logger.info(f"config_data is: {config_data is not None}, type: {type(config_data)}")
|
|
424
|
+
if config_data:
|
|
425
|
+
logger.info(f"Writing CONFIG section with {len(config_data)} keys")
|
|
426
|
+
f.write("* Experiment Configuration\n\n")
|
|
427
|
+
f.write("| Parameter | Value |\n")
|
|
428
|
+
f.write("|-----------|-------|\n")
|
|
429
|
+
|
|
430
|
+
# Display configuration in a clean format
|
|
431
|
+
for key, value in sorted(config_data.items()):
|
|
432
|
+
# Format the key nicely
|
|
433
|
+
display_key = key.replace('_', ' ').title()
|
|
434
|
+
# Format value for org-mode table
|
|
435
|
+
display_value = str(value)
|
|
436
|
+
# Escape pipe characters in values for org table
|
|
437
|
+
display_value = display_value.replace('|', '\\vert{}')
|
|
438
|
+
# For paths, show just the relative part if too long
|
|
439
|
+
if 'SDIR' in key.upper() and len(display_value) > 80:
|
|
440
|
+
# Try to show the end part which is more informative
|
|
441
|
+
if '/' in display_value:
|
|
442
|
+
parts = display_value.split('/')
|
|
443
|
+
# Keep last few parts
|
|
444
|
+
if len(parts) > 4:
|
|
445
|
+
display_value = '.../'+'/'.join(parts[-4:])
|
|
446
|
+
# Wrap in verbatim for paths to avoid formatting issues
|
|
447
|
+
if '/' in display_value or '_' in display_value:
|
|
448
|
+
display_value = f"~{display_value}~"
|
|
449
|
+
f.write(f"| {display_key} | {display_value} |\n")
|
|
450
|
+
f.write("\n")
|
|
451
|
+
|
|
452
|
+
# Footer
|
|
453
|
+
f.write("\n* Report Generation\n\n")
|
|
454
|
+
f.write(f"- Generated at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
455
|
+
f.write("- Generated by: SciTeX Classification Reporter\n")
|
|
456
|
+
f.write("- Format: Org-mode\n")
|
|
457
|
+
|
|
458
|
+
if verbose:
|
|
459
|
+
from scitex.logging import getLogger
|
|
460
|
+
logger = getLogger(__name__)
|
|
461
|
+
logger.info(f"Generated org-mode report: {output_path}")
|
|
462
|
+
|
|
463
|
+
# Optional pandoc conversions
|
|
464
|
+
if convert_formats:
|
|
465
|
+
import subprocess
|
|
466
|
+
import shutil
|
|
467
|
+
import os
|
|
468
|
+
|
|
469
|
+
# Check for pandoc in regular PATH or module system
|
|
470
|
+
pandoc_cmd = shutil.which('pandoc')
|
|
471
|
+
if not pandoc_cmd:
|
|
472
|
+
# Try module system path
|
|
473
|
+
module_pandoc = '/apps/easybuild-2022/easybuild/software/Core/Pandoc/3.1.2/bin/pandoc'
|
|
474
|
+
if os.path.exists(module_pandoc):
|
|
475
|
+
pandoc_cmd = module_pandoc
|
|
476
|
+
|
|
477
|
+
if pandoc_cmd:
|
|
478
|
+
conversions = [
|
|
479
|
+
# (output_filename, extra_args, description)
|
|
480
|
+
(output_path.with_suffix('.md'), [], "markdown"),
|
|
481
|
+
(output_path.with_suffix('.html'), ['--standalone', '--embed-resources'], "HTML"),
|
|
482
|
+
(output_path.with_suffix('.tex'), [], "LaTeX"),
|
|
483
|
+
(output_path.with_suffix('.docx'), ['--resource-path=' + str(output_path.parent.parent)], "DOCX"),
|
|
484
|
+
]
|
|
485
|
+
|
|
486
|
+
for output_file, extra_args, format_name in conversions:
|
|
487
|
+
try:
|
|
488
|
+
cmd = [pandoc_cmd, str(output_path), '-o', str(output_file)] + extra_args
|
|
489
|
+
result = subprocess.run(
|
|
490
|
+
cmd,
|
|
491
|
+
capture_output=True,
|
|
492
|
+
text=True,
|
|
493
|
+
timeout=30
|
|
494
|
+
)
|
|
495
|
+
if result.returncode == 0 and verbose:
|
|
496
|
+
logger.info(f"Generated {format_name} report: {output_file}")
|
|
497
|
+
elif verbose and result.returncode != 0:
|
|
498
|
+
logger.warning(f"{format_name} generation failed: {result.stderr}")
|
|
499
|
+
except subprocess.TimeoutExpired:
|
|
500
|
+
if verbose:
|
|
501
|
+
logger.warning(f"{format_name} conversion timed out")
|
|
502
|
+
except Exception as e:
|
|
503
|
+
if verbose:
|
|
504
|
+
logger.warning(f"{format_name} conversion failed: {e}")
|
|
505
|
+
|
|
506
|
+
# Convert to PDF (requires LaTeX)
|
|
507
|
+
if shutil.which('xelatex') or shutil.which('pdflatex'):
|
|
508
|
+
try:
|
|
509
|
+
pdf_path = output_path.with_suffix('.pdf')
|
|
510
|
+
pdf_engine = 'xelatex' if shutil.which('xelatex') else 'pdflatex'
|
|
511
|
+
result = subprocess.run(
|
|
512
|
+
[pandoc_cmd, str(output_path), f'--pdf-engine={pdf_engine}', '-o', str(pdf_path)],
|
|
513
|
+
capture_output=True,
|
|
514
|
+
text=True,
|
|
515
|
+
timeout=60
|
|
516
|
+
)
|
|
517
|
+
if result.returncode == 0 and verbose:
|
|
518
|
+
logger.info(f"Generated PDF report: {pdf_path}")
|
|
519
|
+
elif verbose:
|
|
520
|
+
logger.warning(f"PDF generation failed: {result.stderr}")
|
|
521
|
+
except subprocess.TimeoutExpired:
|
|
522
|
+
if verbose:
|
|
523
|
+
logger.warning("PDF conversion timed out")
|
|
524
|
+
except Exception as e:
|
|
525
|
+
if verbose:
|
|
526
|
+
logger.warning(f"PDF conversion failed: {e}")
|
|
527
|
+
elif verbose:
|
|
528
|
+
logger.info("Pandoc not found. Skipping format conversions. Try 'module load Pandoc/3.1.2'")
|
|
529
|
+
|
|
530
|
+
return output_path
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def generate_markdown_report(
|
|
534
|
+
results: Dict[str, Any],
|
|
535
|
+
output_path: Union[str, Path],
|
|
536
|
+
include_plots: bool = True,
|
|
537
|
+
verbose: bool = True,
|
|
538
|
+
) -> Path:
|
|
539
|
+
"""
|
|
540
|
+
Generate comprehensive markdown report.
|
|
541
|
+
|
|
542
|
+
Parameters
|
|
543
|
+
----------
|
|
544
|
+
results : Dict[str, Any]
|
|
545
|
+
Classification results dictionary
|
|
546
|
+
output_path : Union[str, Path]
|
|
547
|
+
Output file path
|
|
548
|
+
include_plots : bool, default True
|
|
549
|
+
Whether to include plot images
|
|
550
|
+
verbose : bool, default True
|
|
551
|
+
Whether to print progress messages
|
|
552
|
+
|
|
553
|
+
Returns
|
|
554
|
+
-------
|
|
555
|
+
Path
|
|
556
|
+
Path to generated report
|
|
557
|
+
"""
|
|
558
|
+
output_path = Path(output_path)
|
|
559
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
560
|
+
|
|
561
|
+
with open(output_path, 'w') as f:
|
|
562
|
+
# Header
|
|
563
|
+
f.write("# Classification Report\n\n")
|
|
564
|
+
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
565
|
+
|
|
566
|
+
# Experiment Info
|
|
567
|
+
config = results.get('config', {})
|
|
568
|
+
f.write("## Experiment Information\n\n")
|
|
569
|
+
f.write(f"- **Experiment Name:** {config.get('name', 'Unknown')}\n")
|
|
570
|
+
f.write(f"- **Number of Folds:** {config.get('n_folds', 'N/A')}\n")
|
|
571
|
+
f.write(f"- **Output Directory:** `{config.get('output_dir', 'N/A')}`\n\n")
|
|
572
|
+
|
|
573
|
+
# Summary Statistics
|
|
574
|
+
if 'summary' in results and results['summary']:
|
|
575
|
+
f.write("## Summary Statistics\n\n")
|
|
576
|
+
f.write("| Metric | Mean ± Std | Min | Max |\n")
|
|
577
|
+
f.write("|--------|------------|-----|-----|\n")
|
|
578
|
+
|
|
579
|
+
for metric_name, stats in results['summary'].items():
|
|
580
|
+
if isinstance(stats, dict) and 'mean' in stats:
|
|
581
|
+
mean = stats.get('mean', 0)
|
|
582
|
+
std = stats.get('std', 0)
|
|
583
|
+
min_val = stats.get('min', 0)
|
|
584
|
+
max_val = stats.get('max', 0)
|
|
585
|
+
metric_display = metric_name.replace('_', ' ').title()
|
|
586
|
+
f.write(f"| {metric_display} | {mean:.3f} ± {std:.3f} | "
|
|
587
|
+
f"{min_val:.3f} | {max_val:.3f} |\n")
|
|
588
|
+
f.write("\n")
|
|
589
|
+
|
|
590
|
+
# CV Summary Results with Plots
|
|
591
|
+
if include_plots and 'plots' in results:
|
|
592
|
+
f.write("## CV Summary Results\n\n")
|
|
593
|
+
|
|
594
|
+
# Find cv_summary plots
|
|
595
|
+
cv_summary_plots = {k: v for k, v in results['plots'].items()
|
|
596
|
+
if 'cv_summary' in k or 'cv-summary' in k}
|
|
597
|
+
|
|
598
|
+
if cv_summary_plots:
|
|
599
|
+
# Confusion Matrix (support both old and new naming)
|
|
600
|
+
cm_plots = [v for k, v in cv_summary_plots.items()
|
|
601
|
+
if ('confusion_matrix' in k or 'confusion-matrix' in k)]
|
|
602
|
+
if cm_plots:
|
|
603
|
+
f.write("### CV Summary Confusion Matrix\n\n")
|
|
604
|
+
for plot_path in cm_plots:
|
|
605
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
606
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
607
|
+
f.write(f"\n\n")
|
|
608
|
+
|
|
609
|
+
# ROC Curve (support both old and new naming)
|
|
610
|
+
roc_plots = [v for k, v in cv_summary_plots.items()
|
|
611
|
+
if ('roc_curve' in k or 'roc-curve' in k)]
|
|
612
|
+
if roc_plots:
|
|
613
|
+
f.write("### CV Summary ROC Curve\n\n")
|
|
614
|
+
for plot_path in roc_plots:
|
|
615
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
616
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
617
|
+
f.write(f"\n\n")
|
|
618
|
+
|
|
619
|
+
# PR Curve (support both old and new naming)
|
|
620
|
+
pr_plots = [v for k, v in cv_summary_plots.items()
|
|
621
|
+
if ('pr_curve' in k or 'pr-curve' in k)]
|
|
622
|
+
if pr_plots:
|
|
623
|
+
f.write("### CV Summary Precision-Recall Curve\n\n")
|
|
624
|
+
for plot_path in pr_plots:
|
|
625
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
626
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
627
|
+
f.write(f"\n\n")
|
|
628
|
+
|
|
629
|
+
# Per-Fold Results (abbreviated for brevity)
|
|
630
|
+
if 'folds' in results and results['folds']:
|
|
631
|
+
f.write("## Per-Fold Results\n\n")
|
|
632
|
+
|
|
633
|
+
# Create summary table
|
|
634
|
+
f.write("| Fold | Balanced Accuracy | ROC AUC | PR AUC | MCC |\n")
|
|
635
|
+
f.write("|------|-------------------|---------|--------|-----|\n")
|
|
636
|
+
|
|
637
|
+
for fold_data in results['folds']:
|
|
638
|
+
fold_id = fold_data.get('fold_id', 0)
|
|
639
|
+
|
|
640
|
+
# Extract metrics
|
|
641
|
+
bacc = _extract_metric_value(fold_data.get('balanced_accuracy'))
|
|
642
|
+
roc = _extract_metric_value(fold_data.get('roc_auc'))
|
|
643
|
+
pr = _extract_metric_value(fold_data.get('pr_auc'))
|
|
644
|
+
mcc = _extract_metric_value(fold_data.get('mcc'))
|
|
645
|
+
|
|
646
|
+
f.write(f"| {fold_id:02d} | ")
|
|
647
|
+
f.write(f"{bacc:.3f} | " if bacc is not None else "N/A | ")
|
|
648
|
+
f.write(f"{roc:.3f} | " if roc is not None else "N/A | ")
|
|
649
|
+
f.write(f"{pr:.3f} | " if pr is not None else "N/A | ")
|
|
650
|
+
f.write(f"{mcc:.3f} |\n" if mcc is not None else "N/A |\n")
|
|
651
|
+
|
|
652
|
+
f.write("\n")
|
|
653
|
+
|
|
654
|
+
if verbose:
|
|
655
|
+
from scitex.logging import getLogger
|
|
656
|
+
logger = getLogger(__name__)
|
|
657
|
+
logger.info(f"Generated markdown report: {output_path}")
|
|
658
|
+
|
|
659
|
+
return output_path
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def generate_latex_report(
|
|
663
|
+
results: Dict[str, Any],
|
|
664
|
+
output_path: Union[str, Path],
|
|
665
|
+
verbose: bool = True,
|
|
666
|
+
) -> Path:
|
|
667
|
+
"""
|
|
668
|
+
Generate LaTeX report for academic papers.
|
|
669
|
+
|
|
670
|
+
Parameters
|
|
671
|
+
----------
|
|
672
|
+
results : Dict[str, Any]
|
|
673
|
+
Classification results dictionary
|
|
674
|
+
output_path : Union[str, Path]
|
|
675
|
+
Output file path
|
|
676
|
+
verbose : bool, default True
|
|
677
|
+
Whether to print progress messages
|
|
678
|
+
|
|
679
|
+
Returns
|
|
680
|
+
-------
|
|
681
|
+
Path
|
|
682
|
+
Path to generated report
|
|
683
|
+
"""
|
|
684
|
+
output_path = Path(output_path)
|
|
685
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
686
|
+
|
|
687
|
+
with open(output_path, 'w') as f:
|
|
688
|
+
# Document setup
|
|
689
|
+
f.write("\\documentclass[11pt]{article}\n")
|
|
690
|
+
f.write("\\usepackage{booktabs}\n")
|
|
691
|
+
f.write("\\usepackage{graphicx}\n")
|
|
692
|
+
f.write("\\usepackage{float}\n")
|
|
693
|
+
f.write("\\usepackage{amsmath}\n")
|
|
694
|
+
f.write("\\usepackage{hyperref}\n\n")
|
|
695
|
+
|
|
696
|
+
f.write("\\title{Classification Report}\n")
|
|
697
|
+
f.write(f"\\date{{\\today}}\n\n")
|
|
698
|
+
|
|
699
|
+
f.write("\\begin{document}\n")
|
|
700
|
+
f.write("\\maketitle\n\n")
|
|
701
|
+
|
|
702
|
+
# Experiment Information
|
|
703
|
+
config = results.get('config', {})
|
|
704
|
+
f.write("\\section{Experiment Information}\n\n")
|
|
705
|
+
f.write("\\begin{itemize}\n")
|
|
706
|
+
f.write(f"\\item \\textbf{{Experiment Name:}} {_latex_escape(config.get('name', 'Unknown'))}\n")
|
|
707
|
+
f.write(f"\\item \\textbf{{Number of Folds:}} {config.get('n_folds', 'N/A')}\n")
|
|
708
|
+
f.write(f"\\item \\textbf{{Output Directory:}} \\texttt{{{_latex_escape(str(config.get('output_dir', 'N/A')))}}}\n")
|
|
709
|
+
f.write("\\end{itemize}\n\n")
|
|
710
|
+
|
|
711
|
+
# Summary Statistics
|
|
712
|
+
if 'summary' in results and results['summary']:
|
|
713
|
+
f.write("\\section{Summary Statistics}\n\n")
|
|
714
|
+
f.write("\\begin{table}[H]\n")
|
|
715
|
+
f.write("\\centering\n")
|
|
716
|
+
f.write("\\begin{tabular}{lccc}\n")
|
|
717
|
+
f.write("\\toprule\n")
|
|
718
|
+
f.write("Metric & Mean $\\pm$ Std & Min & Max \\\\\n")
|
|
719
|
+
f.write("\\midrule\n")
|
|
720
|
+
|
|
721
|
+
for metric_name, stats in results['summary'].items():
|
|
722
|
+
if isinstance(stats, dict) and 'mean' in stats:
|
|
723
|
+
mean = stats.get('mean', 0)
|
|
724
|
+
std = stats.get('std', 0)
|
|
725
|
+
min_val = stats.get('min', 0)
|
|
726
|
+
max_val = stats.get('max', 0)
|
|
727
|
+
metric_display = metric_name.replace('_', ' ').title()
|
|
728
|
+
f.write(f"{_latex_escape(metric_display)} & "
|
|
729
|
+
f"${mean:.3f} \\pm {std:.3f}$ & "
|
|
730
|
+
f"{min_val:.3f} & {max_val:.3f} \\\\\n")
|
|
731
|
+
|
|
732
|
+
f.write("\\bottomrule\n")
|
|
733
|
+
f.write("\\end{tabular}\n")
|
|
734
|
+
f.write("\\caption{Cross-validation performance metrics}\n")
|
|
735
|
+
f.write("\\label{tab:cv_metrics}\n")
|
|
736
|
+
f.write("\\end{table}\n\n")
|
|
737
|
+
|
|
738
|
+
# Plots (if available)
|
|
739
|
+
if 'plots' in results:
|
|
740
|
+
f.write("\\section{Visualizations}\n\n")
|
|
741
|
+
|
|
742
|
+
# CV Summary plots (support both old and new naming)
|
|
743
|
+
cv_summary_plots = {k: v for k, v in results['plots'].items()
|
|
744
|
+
if 'cv_summary' in k or 'cv-summary' in k}
|
|
745
|
+
|
|
746
|
+
if cv_summary_plots:
|
|
747
|
+
# Find specific plot types (support both old underscore and new hyphen naming)
|
|
748
|
+
for plot_type, plot_type_alt, title in [
|
|
749
|
+
('confusion_matrix', 'confusion-matrix', 'CV Summary Confusion Matrix'),
|
|
750
|
+
('roc_curve', 'roc-curve', 'CV Summary ROC Curve'),
|
|
751
|
+
('pr_curve', 'pr-curve', 'CV Summary Precision-Recall Curve')
|
|
752
|
+
]:
|
|
753
|
+
type_plots = [v for k, v in cv_summary_plots.items()
|
|
754
|
+
if (plot_type in k or plot_type_alt in k)]
|
|
755
|
+
if type_plots:
|
|
756
|
+
f.write(f"\\subsection{{{title}}}\n\n")
|
|
757
|
+
for plot_path in type_plots:
|
|
758
|
+
rel_path = _make_relative_path(output_path.parent,
|
|
759
|
+
Path(results.get('config', {}).get('output_dir', '.')) / plot_path)
|
|
760
|
+
f.write("\\begin{figure}[H]\n")
|
|
761
|
+
f.write("\\centering\n")
|
|
762
|
+
f.write(f"\\includegraphics[width=0.8\\textwidth]{{{rel_path}}}\n")
|
|
763
|
+
f.write(f"\\caption{{{title}}}\n")
|
|
764
|
+
f.write(f"\\label{{fig:{plot_type}_cv_summary}}\n")
|
|
765
|
+
f.write("\\end{figure}\n\n")
|
|
766
|
+
|
|
767
|
+
f.write("\\end{document}\n")
|
|
768
|
+
|
|
769
|
+
if verbose:
|
|
770
|
+
from scitex.logging import getLogger
|
|
771
|
+
logger = getLogger(__name__)
|
|
772
|
+
logger.info(f"Generated LaTeX report: {output_path}")
|
|
773
|
+
|
|
774
|
+
return output_path
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def create_summary_statistics(results: Dict[str, Any]) -> Dict[str, Any]:
|
|
778
|
+
"""
|
|
779
|
+
Create comprehensive summary statistics from results.
|
|
780
|
+
|
|
781
|
+
Parameters
|
|
782
|
+
----------
|
|
783
|
+
results : Dict[str, Any]
|
|
784
|
+
Classification results
|
|
785
|
+
|
|
786
|
+
Returns
|
|
787
|
+
-------
|
|
788
|
+
Dict[str, Any]
|
|
789
|
+
Summary statistics
|
|
790
|
+
"""
|
|
791
|
+
summary = {}
|
|
792
|
+
|
|
793
|
+
if 'folds' in results:
|
|
794
|
+
# Aggregate metrics across folds
|
|
795
|
+
metrics_to_aggregate = [
|
|
796
|
+
'balanced_accuracy', 'roc_auc', 'pr_auc', 'mcc',
|
|
797
|
+
'precision', 'recall', 'f1_score'
|
|
798
|
+
]
|
|
799
|
+
|
|
800
|
+
for metric_name in metrics_to_aggregate:
|
|
801
|
+
values = []
|
|
802
|
+
for fold_data in results['folds']:
|
|
803
|
+
if metric_name in fold_data:
|
|
804
|
+
value = _extract_metric_value(fold_data[metric_name])
|
|
805
|
+
if value is not None:
|
|
806
|
+
values.append(value)
|
|
807
|
+
|
|
808
|
+
if values:
|
|
809
|
+
values = np.array(values)
|
|
810
|
+
summary[metric_name] = {
|
|
811
|
+
'mean': float(np.mean(values)),
|
|
812
|
+
'std': float(np.std(values)),
|
|
813
|
+
'min': float(np.min(values)),
|
|
814
|
+
'max': float(np.max(values)),
|
|
815
|
+
'median': float(np.median(values)),
|
|
816
|
+
'values': values.tolist()
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
return summary
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def export_for_paper(
|
|
823
|
+
results: Dict[str, Any],
|
|
824
|
+
output_dir: Union[str, Path],
|
|
825
|
+
verbose: bool = True,
|
|
826
|
+
) -> Dict[str, Path]:
|
|
827
|
+
"""
|
|
828
|
+
Export results in formats suitable for academic papers.
|
|
829
|
+
|
|
830
|
+
Parameters
|
|
831
|
+
----------
|
|
832
|
+
results : Dict[str, Any]
|
|
833
|
+
Classification results
|
|
834
|
+
output_dir : Union[str, Path]
|
|
835
|
+
Output directory for exports
|
|
836
|
+
verbose : bool, default True
|
|
837
|
+
Whether to print progress messages
|
|
838
|
+
|
|
839
|
+
Returns
|
|
840
|
+
-------
|
|
841
|
+
Dict[str, Path]
|
|
842
|
+
Paths to exported files
|
|
843
|
+
"""
|
|
844
|
+
output_dir = Path(output_dir)
|
|
845
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
846
|
+
|
|
847
|
+
exported_files = {}
|
|
848
|
+
|
|
849
|
+
# Export summary metrics as LaTeX table
|
|
850
|
+
if 'summary' in results:
|
|
851
|
+
latex_table_path = output_dir / "metrics_table.tex"
|
|
852
|
+
_export_metrics_table_latex(results['summary'], latex_table_path)
|
|
853
|
+
exported_files['metrics_table'] = latex_table_path
|
|
854
|
+
|
|
855
|
+
# Also export as CSV for easier processing
|
|
856
|
+
csv_table_path = output_dir / "summary_table.csv"
|
|
857
|
+
_export_summary_table_csv(results['summary'], csv_table_path)
|
|
858
|
+
exported_files['summary_table'] = csv_table_path
|
|
859
|
+
|
|
860
|
+
# Export raw results as JSON
|
|
861
|
+
import json
|
|
862
|
+
raw_results_path = output_dir / "raw_results.json"
|
|
863
|
+
with open(raw_results_path, 'w') as f:
|
|
864
|
+
# Create serializable version of results
|
|
865
|
+
serializable_results = _make_serializable(results)
|
|
866
|
+
json.dump(serializable_results, f, indent=2)
|
|
867
|
+
exported_files['raw_results'] = raw_results_path
|
|
868
|
+
|
|
869
|
+
# Export confusion matrix as CSV
|
|
870
|
+
if 'overall_confusion_matrix' in results:
|
|
871
|
+
cm_path = output_dir / "confusion_matrix.csv"
|
|
872
|
+
cm_data = np.array(results['overall_confusion_matrix'])
|
|
873
|
+
cm_df = pd.DataFrame(cm_data)
|
|
874
|
+
cm_df.to_csv(cm_path, index=True)
|
|
875
|
+
exported_files['confusion_matrix'] = cm_path
|
|
876
|
+
|
|
877
|
+
# Copy key plots
|
|
878
|
+
config = results.get('config', {})
|
|
879
|
+
base_dir = Path(results.get('config', {}).get('output_dir', '.'))
|
|
880
|
+
|
|
881
|
+
if 'plots' in results:
|
|
882
|
+
plots_dir = output_dir / "figures"
|
|
883
|
+
plots_dir.mkdir(exist_ok=True)
|
|
884
|
+
|
|
885
|
+
# Copy cv_summary plots with standardized names
|
|
886
|
+
cv_summary_plots = {k: v for k, v in results['plots'].items()
|
|
887
|
+
if 'cv_summary' in k}
|
|
888
|
+
|
|
889
|
+
for plot_key, plot_path in cv_summary_plots.items():
|
|
890
|
+
src_path = base_dir / plot_path
|
|
891
|
+
if src_path.exists():
|
|
892
|
+
# Standardize filename
|
|
893
|
+
if 'confusion_matrix' in plot_key:
|
|
894
|
+
dest_name = "confusion_matrix_cv_summary.jpg"
|
|
895
|
+
elif 'roc_curve' in plot_key:
|
|
896
|
+
dest_name = "roc_curve_cv_summary.jpg"
|
|
897
|
+
elif 'pr_curve' in plot_key:
|
|
898
|
+
dest_name = "pr_curve_cv_summary.jpg"
|
|
899
|
+
else:
|
|
900
|
+
dest_name = src_path.name
|
|
901
|
+
|
|
902
|
+
dest_path = plots_dir / dest_name
|
|
903
|
+
import shutil
|
|
904
|
+
shutil.copy2(src_path, dest_path)
|
|
905
|
+
exported_files[f"figure_{dest_name.split('.')[0]}"] = dest_path
|
|
906
|
+
|
|
907
|
+
if verbose:
|
|
908
|
+
from scitex.logging import getLogger
|
|
909
|
+
logger = getLogger(__name__)
|
|
910
|
+
logger.info(f"Exported {len(exported_files)} files for paper to {output_dir}")
|
|
911
|
+
|
|
912
|
+
return exported_files
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
# Helper functions
|
|
916
|
+
|
|
917
|
+
def _extract_metric_value(metric_data: Any) -> Optional[float]:
|
|
918
|
+
"""Extract numeric value from metric data."""
|
|
919
|
+
if metric_data is None:
|
|
920
|
+
return None
|
|
921
|
+
if isinstance(metric_data, dict) and 'value' in metric_data:
|
|
922
|
+
return float(metric_data['value'])
|
|
923
|
+
if isinstance(metric_data, (int, float, np.number)):
|
|
924
|
+
return float(metric_data)
|
|
925
|
+
return None
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def _make_relative_path(from_dir: Path, to_path: Path) -> str:
|
|
929
|
+
"""Create relative path from one directory to another."""
|
|
930
|
+
try:
|
|
931
|
+
# Try to make relative path
|
|
932
|
+
rel_path = Path(to_path).relative_to(from_dir)
|
|
933
|
+
return str(rel_path)
|
|
934
|
+
except ValueError:
|
|
935
|
+
# If not possible, try going up directories
|
|
936
|
+
try:
|
|
937
|
+
# Count how many directories to go up
|
|
938
|
+
common_parts = 0
|
|
939
|
+
from_parts = from_dir.parts
|
|
940
|
+
to_parts = Path(to_path).parts
|
|
941
|
+
|
|
942
|
+
for fp, tp in zip(from_parts, to_parts):
|
|
943
|
+
if fp == tp:
|
|
944
|
+
common_parts += 1
|
|
945
|
+
else:
|
|
946
|
+
break
|
|
947
|
+
|
|
948
|
+
# Build relative path
|
|
949
|
+
ups = len(from_parts) - common_parts
|
|
950
|
+
rel_parts = ['..'] * ups + list(to_parts[common_parts:])
|
|
951
|
+
return '/'.join(rel_parts)
|
|
952
|
+
except:
|
|
953
|
+
# Fallback to absolute path
|
|
954
|
+
return str(to_path)
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
def _latex_escape(text: str) -> str:
|
|
958
|
+
"""Escape special LaTeX characters."""
|
|
959
|
+
if not isinstance(text, str):
|
|
960
|
+
text = str(text)
|
|
961
|
+
|
|
962
|
+
replacements = {
|
|
963
|
+
'\\': '\\textbackslash{}',
|
|
964
|
+
'{': '\\{',
|
|
965
|
+
'}': '\\}',
|
|
966
|
+
'$': '\\$',
|
|
967
|
+
'&': '\\&',
|
|
968
|
+
'%': '\\%',
|
|
969
|
+
'#': '\\#',
|
|
970
|
+
'_': '\\_',
|
|
971
|
+
'~': '\\textasciitilde{}',
|
|
972
|
+
'^': '\\textasciicircum{}'
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
for old, new in replacements.items():
|
|
976
|
+
text = text.replace(old, new)
|
|
977
|
+
|
|
978
|
+
return text
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
def _export_metrics_table_latex(
|
|
982
|
+
summary: Dict[str, Any],
|
|
983
|
+
output_path: Path
|
|
984
|
+
) -> None:
|
|
985
|
+
"""Export summary metrics as a LaTeX table."""
|
|
986
|
+
with open(output_path, 'w') as f:
|
|
987
|
+
f.write("% Classification metrics summary table\n")
|
|
988
|
+
f.write("\\begin{table}[htbp]\n")
|
|
989
|
+
f.write("\\centering\n")
|
|
990
|
+
f.write("\\begin{tabular}{lccc}\n")
|
|
991
|
+
f.write("\\toprule\n")
|
|
992
|
+
f.write("Metric & Mean $\\pm$ Std & Min & Max \\\\\n")
|
|
993
|
+
f.write("\\midrule\n")
|
|
994
|
+
|
|
995
|
+
for metric_name, stats in summary.items():
|
|
996
|
+
if isinstance(stats, dict) and 'mean' in stats:
|
|
997
|
+
mean = stats.get('mean', 0)
|
|
998
|
+
std = stats.get('std', 0)
|
|
999
|
+
min_val = stats.get('min', 0)
|
|
1000
|
+
max_val = stats.get('max', 0)
|
|
1001
|
+
metric_display = metric_name.replace('_', ' ').title()
|
|
1002
|
+
f.write(f"{_latex_escape(metric_display)} & "
|
|
1003
|
+
f"${mean:.3f} \\pm {std:.3f}$ & "
|
|
1004
|
+
f"{min_val:.3f} & {max_val:.3f} \\\\\n")
|
|
1005
|
+
|
|
1006
|
+
f.write("\\bottomrule\n")
|
|
1007
|
+
f.write("\\end{tabular}\n")
|
|
1008
|
+
f.write("\\caption{Classification performance metrics}\n")
|
|
1009
|
+
f.write("\\label{tab:metrics}\n")
|
|
1010
|
+
f.write("\\end{table}\n")
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def _export_summary_table_csv(
|
|
1014
|
+
summary: Dict[str, Any],
|
|
1015
|
+
output_path: Path
|
|
1016
|
+
) -> None:
|
|
1017
|
+
"""Export summary metrics as CSV table."""
|
|
1018
|
+
# Create DataFrame from summary
|
|
1019
|
+
data = []
|
|
1020
|
+
for metric_name, stats in summary.items():
|
|
1021
|
+
if isinstance(stats, dict) and 'mean' in stats:
|
|
1022
|
+
row = {
|
|
1023
|
+
'Metric': metric_name.replace('_', ' ').title(),
|
|
1024
|
+
'Mean': stats.get('mean', 0),
|
|
1025
|
+
'Std': stats.get('std', 0),
|
|
1026
|
+
'Min': stats.get('min', 0),
|
|
1027
|
+
'Max': stats.get('max', 0)
|
|
1028
|
+
}
|
|
1029
|
+
data.append(row)
|
|
1030
|
+
|
|
1031
|
+
if data:
|
|
1032
|
+
df = pd.DataFrame(data)
|
|
1033
|
+
df.to_csv(output_path, index=False)
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
def _make_serializable(obj: Any) -> Any:
|
|
1037
|
+
"""Convert numpy arrays and other non-serializable objects to serializable format."""
|
|
1038
|
+
if isinstance(obj, np.ndarray):
|
|
1039
|
+
return obj.tolist()
|
|
1040
|
+
elif isinstance(obj, (np.integer, np.floating)):
|
|
1041
|
+
return float(obj)
|
|
1042
|
+
elif isinstance(obj, pd.DataFrame):
|
|
1043
|
+
# Convert DataFrame to dict with orient='list' for JSON serialization
|
|
1044
|
+
return obj.to_dict(orient='list')
|
|
1045
|
+
elif isinstance(obj, pd.Series):
|
|
1046
|
+
return obj.tolist()
|
|
1047
|
+
elif isinstance(obj, dict):
|
|
1048
|
+
return {k: _make_serializable(v) for k, v in obj.items()}
|
|
1049
|
+
elif isinstance(obj, list):
|
|
1050
|
+
return [_make_serializable(item) for item in obj]
|
|
1051
|
+
elif isinstance(obj, Path):
|
|
1052
|
+
return str(obj)
|
|
1053
|
+
else:
|
|
1054
|
+
return obj
|
|
1055
|
+
|
|
1056
|
+
# EOF
|