scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 17:10:00 (ywatanabe)"
|
|
4
|
+
# File: _TimeSeriesBlockingSplit.py
|
|
5
|
+
|
|
6
|
+
__FILE__ = "_TimeSeriesBlockingSplit.py"
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Functionalities:
|
|
10
|
+
- Implements time series split with blocking for multiple subjects/groups
|
|
11
|
+
- Ensures temporal integrity within each subject's timeline
|
|
12
|
+
- Allows cross-subject generalization while preventing data leakage
|
|
13
|
+
- Provides visualization with scatter plots and subject color coding
|
|
14
|
+
- Validates that no data mixing occurs between subjects
|
|
15
|
+
- Supports expanding window approach for more training data in later folds
|
|
16
|
+
|
|
17
|
+
Dependencies:
|
|
18
|
+
- packages:
|
|
19
|
+
- numpy
|
|
20
|
+
- sklearn
|
|
21
|
+
- matplotlib
|
|
22
|
+
- scitex
|
|
23
|
+
|
|
24
|
+
IO:
|
|
25
|
+
- input-files:
|
|
26
|
+
- None (generates synthetic multi-subject data for demonstration)
|
|
27
|
+
- output-files:
|
|
28
|
+
- ./blocking_splits_demo.png (visualization with scatter plots)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
"""Imports"""
|
|
32
|
+
import os
|
|
33
|
+
import sys
|
|
34
|
+
import argparse
|
|
35
|
+
import numpy as np
|
|
36
|
+
from typing import Iterator, Optional, Tuple
|
|
37
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
38
|
+
import matplotlib.pyplot as plt
|
|
39
|
+
import matplotlib.patches as patches
|
|
40
|
+
import scitex as stx
|
|
41
|
+
from scitex import logging
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TimeSeriesBlockingSplit(BaseCrossValidator):
|
|
47
|
+
"""
|
|
48
|
+
Time series split with blocking to handle multiple subjects/groups.
|
|
49
|
+
|
|
50
|
+
This splitter ensures temporal integrity within each subject while allowing
|
|
51
|
+
cross-subject generalization. Each subject's data is kept temporally coherent,
|
|
52
|
+
but subjects can appear in both training and test sets at different time periods.
|
|
53
|
+
|
|
54
|
+
Key Features:
|
|
55
|
+
- Temporal order preserved within each subject
|
|
56
|
+
- No data leakage within individual subject timelines
|
|
57
|
+
- Expanding window approach: more training data in later folds
|
|
58
|
+
- Cross-subject generalization: subjects can be in both train and test
|
|
59
|
+
|
|
60
|
+
Use Cases:
|
|
61
|
+
- Multiple patients with longitudinal medical data
|
|
62
|
+
- Multiple stocks with time series financial data
|
|
63
|
+
- Multiple sensors with temporal measurements
|
|
64
|
+
- Any scenario with grouped time series data
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
n_splits : int, default=5
|
|
69
|
+
Number of splits (folds)
|
|
70
|
+
test_ratio : float, default=0.2
|
|
71
|
+
Proportion of data for test set per subject
|
|
72
|
+
|
|
73
|
+
Examples
|
|
74
|
+
--------
|
|
75
|
+
>>> from scitex.ml.classification import TimeSeriesBlockingSplit
|
|
76
|
+
>>> import numpy as np
|
|
77
|
+
>>>
|
|
78
|
+
>>> # Create data: 100 samples, 4 subjects (25 samples each)
|
|
79
|
+
>>> X = np.random.randn(100, 10)
|
|
80
|
+
>>> y = np.random.randint(0, 2, 100)
|
|
81
|
+
>>> timestamps = np.arange(100)
|
|
82
|
+
>>> groups = np.repeat([0, 1, 2, 3], 25) # Subject IDs
|
|
83
|
+
>>>
|
|
84
|
+
>>> # Each subject gets temporal split: early samples → train, later → test
|
|
85
|
+
>>> splitter = TimeSeriesBlockingSplit(n_splits=3, test_ratio=0.3)
|
|
86
|
+
>>> for train_idx, test_idx in splitter.split(X, y, timestamps, groups):
|
|
87
|
+
... train_subjects = set(groups[train_idx])
|
|
88
|
+
... test_subjects = set(groups[test_idx])
|
|
89
|
+
... print(f"Train subjects: {train_subjects}, Test subjects: {test_subjects}")
|
|
90
|
+
... # Output shows same subjects in both sets but different time periods
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self, n_splits: int = 5, test_ratio: float = 0.2, val_ratio: float = 0.0, random_state: Optional[int] = None):
|
|
94
|
+
self.n_splits = n_splits
|
|
95
|
+
self.test_ratio = test_ratio
|
|
96
|
+
self.val_ratio = val_ratio
|
|
97
|
+
self.random_state = random_state
|
|
98
|
+
self.rng = np.random.default_rng(random_state)
|
|
99
|
+
|
|
100
|
+
def split(
|
|
101
|
+
self,
|
|
102
|
+
X: np.ndarray,
|
|
103
|
+
y: Optional[np.ndarray] = None,
|
|
104
|
+
timestamps: Optional[np.ndarray] = None,
|
|
105
|
+
groups: Optional[np.ndarray] = None,
|
|
106
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
107
|
+
"""
|
|
108
|
+
Generate indices respecting group boundaries.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
X : array-like, shape (n_samples, n_features)
|
|
113
|
+
Training data
|
|
114
|
+
y : array-like, shape (n_samples,)
|
|
115
|
+
Target variable
|
|
116
|
+
timestamps : array-like, shape (n_samples,)
|
|
117
|
+
Timestamps for temporal ordering (required)
|
|
118
|
+
groups : array-like, shape (n_samples,)
|
|
119
|
+
Group labels (e.g., patient IDs) - required
|
|
120
|
+
|
|
121
|
+
Yields
|
|
122
|
+
------
|
|
123
|
+
train : ndarray
|
|
124
|
+
Training set indices
|
|
125
|
+
test : ndarray
|
|
126
|
+
Test set indices
|
|
127
|
+
"""
|
|
128
|
+
if groups is None:
|
|
129
|
+
raise ValueError("groups must be provided for blocking time series split")
|
|
130
|
+
|
|
131
|
+
if timestamps is None:
|
|
132
|
+
raise ValueError("timestamps must be provided")
|
|
133
|
+
|
|
134
|
+
unique_groups = np.unique(groups)
|
|
135
|
+
|
|
136
|
+
for i in range(self.n_splits):
|
|
137
|
+
train_indices = []
|
|
138
|
+
test_indices = []
|
|
139
|
+
|
|
140
|
+
for group in unique_groups:
|
|
141
|
+
group_mask = groups == group
|
|
142
|
+
group_indices = np.where(group_mask)[0]
|
|
143
|
+
group_times = timestamps[group_mask]
|
|
144
|
+
|
|
145
|
+
# Sort group by time
|
|
146
|
+
time_order = np.argsort(group_times)
|
|
147
|
+
sorted_group_indices = group_indices[time_order]
|
|
148
|
+
|
|
149
|
+
# Split this group
|
|
150
|
+
n_group = len(sorted_group_indices)
|
|
151
|
+
test_size = int(n_group * self.test_ratio)
|
|
152
|
+
train_size = n_group - test_size
|
|
153
|
+
|
|
154
|
+
# Expanding window for this group
|
|
155
|
+
split_point = train_size - (self.n_splits - i - 1) * (test_size // self.n_splits)
|
|
156
|
+
split_point = max(1, min(split_point, train_size))
|
|
157
|
+
|
|
158
|
+
train_indices.extend(sorted_group_indices[:split_point])
|
|
159
|
+
test_indices.extend(sorted_group_indices[split_point:split_point + test_size])
|
|
160
|
+
|
|
161
|
+
yield np.array(train_indices), np.array(test_indices)
|
|
162
|
+
|
|
163
|
+
def split_with_val(
|
|
164
|
+
self,
|
|
165
|
+
X: np.ndarray,
|
|
166
|
+
y: Optional[np.ndarray] = None,
|
|
167
|
+
timestamps: Optional[np.ndarray] = None,
|
|
168
|
+
groups: Optional[np.ndarray] = None,
|
|
169
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
170
|
+
"""
|
|
171
|
+
Generate indices with separate validation set respecting group boundaries.
|
|
172
|
+
|
|
173
|
+
Each subject gets its own train/val/test split maintaining temporal order.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
X : array-like, shape (n_samples, n_features)
|
|
178
|
+
Training data
|
|
179
|
+
y : array-like, shape (n_samples,)
|
|
180
|
+
Target variable
|
|
181
|
+
timestamps : array-like, shape (n_samples,)
|
|
182
|
+
Timestamps for temporal ordering (required)
|
|
183
|
+
groups : array-like, shape (n_samples,)
|
|
184
|
+
Group labels (e.g., patient IDs) - required
|
|
185
|
+
|
|
186
|
+
Yields
|
|
187
|
+
------
|
|
188
|
+
train : ndarray
|
|
189
|
+
Training set indices
|
|
190
|
+
val : ndarray
|
|
191
|
+
Validation set indices
|
|
192
|
+
test : ndarray
|
|
193
|
+
Test set indices
|
|
194
|
+
"""
|
|
195
|
+
if groups is None:
|
|
196
|
+
raise ValueError("groups must be provided for blocking time series split")
|
|
197
|
+
|
|
198
|
+
if timestamps is None:
|
|
199
|
+
raise ValueError("timestamps must be provided")
|
|
200
|
+
|
|
201
|
+
unique_groups = np.unique(groups)
|
|
202
|
+
|
|
203
|
+
for i in range(self.n_splits):
|
|
204
|
+
train_indices = []
|
|
205
|
+
val_indices = []
|
|
206
|
+
test_indices = []
|
|
207
|
+
|
|
208
|
+
for group in unique_groups:
|
|
209
|
+
group_mask = groups == group
|
|
210
|
+
group_indices = np.where(group_mask)[0]
|
|
211
|
+
group_times = timestamps[group_mask]
|
|
212
|
+
|
|
213
|
+
# Sort group by time
|
|
214
|
+
time_order = np.argsort(group_times)
|
|
215
|
+
sorted_group_indices = group_indices[time_order]
|
|
216
|
+
|
|
217
|
+
# Split this group into train/val/test
|
|
218
|
+
n_group = len(sorted_group_indices)
|
|
219
|
+
test_size = int(n_group * self.test_ratio)
|
|
220
|
+
val_size = int(n_group * self.val_ratio) if self.val_ratio > 0 else 0
|
|
221
|
+
train_size = n_group - test_size - val_size
|
|
222
|
+
|
|
223
|
+
# Expanding window approach for training
|
|
224
|
+
split_point_train = train_size - (self.n_splits - i - 1) * (test_size // self.n_splits)
|
|
225
|
+
split_point_train = max(1, min(split_point_train, train_size))
|
|
226
|
+
|
|
227
|
+
# Define split points
|
|
228
|
+
val_start = split_point_train
|
|
229
|
+
test_start = val_start + val_size
|
|
230
|
+
|
|
231
|
+
# Ensure we have enough data
|
|
232
|
+
if test_start + test_size > n_group:
|
|
233
|
+
test_size = n_group - test_start
|
|
234
|
+
|
|
235
|
+
# Extract indices for this group
|
|
236
|
+
train_indices.extend(sorted_group_indices[:split_point_train])
|
|
237
|
+
if val_size > 0:
|
|
238
|
+
val_indices.extend(sorted_group_indices[val_start:test_start])
|
|
239
|
+
test_indices.extend(sorted_group_indices[test_start:test_start + test_size])
|
|
240
|
+
|
|
241
|
+
yield np.array(train_indices), np.array(val_indices), np.array(test_indices)
|
|
242
|
+
|
|
243
|
+
def get_n_splits(self, X=None, y=None, groups=None):
|
|
244
|
+
"""Returns the number of splitting iterations."""
|
|
245
|
+
return self.n_splits
|
|
246
|
+
|
|
247
|
+
def plot_splits(self, X, y=None, timestamps=None, groups=None, figsize=(12, 6), save_path=None):
|
|
248
|
+
"""
|
|
249
|
+
Visualize the blocking splits showing subject separation.
|
|
250
|
+
|
|
251
|
+
This visualization shows how data from different subjects/groups is allocated
|
|
252
|
+
to training and test sets while maintaining temporal order within each subject.
|
|
253
|
+
|
|
254
|
+
Color Scheme:
|
|
255
|
+
- Rectangle border: Blue = Training set, Red = Test set
|
|
256
|
+
- Rectangle fill: Different colors represent different subjects/groups
|
|
257
|
+
- Each subject gets a unique color (cycling through colormap)
|
|
258
|
+
|
|
259
|
+
Key Features:
|
|
260
|
+
- No mixing: Each subject's data stays within temporal boundaries
|
|
261
|
+
- Subject separation: Same subject can appear in both train/test but at different times
|
|
262
|
+
- Temporal integrity: Time flows left to right for each subject
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
X : array-like
|
|
267
|
+
Training data
|
|
268
|
+
y : array-like, optional
|
|
269
|
+
Target variable (not used)
|
|
270
|
+
timestamps : array-like, optional
|
|
271
|
+
Timestamps (if None, uses sample indices)
|
|
272
|
+
groups : array-like
|
|
273
|
+
Group labels (required for blocking split) - each unique value represents a subject
|
|
274
|
+
figsize : tuple, default (12, 6)
|
|
275
|
+
Figure size
|
|
276
|
+
save_path : str, optional
|
|
277
|
+
Path to save the plot
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
fig : matplotlib.figure.Figure
|
|
282
|
+
The created figure with proper legend showing subject colors
|
|
283
|
+
|
|
284
|
+
Examples
|
|
285
|
+
--------
|
|
286
|
+
>>> splitter = TimeSeriesBlockingSplit(n_splits=3)
|
|
287
|
+
>>> fig = splitter.plot_splits(X, timestamps=timestamps, groups=subject_ids)
|
|
288
|
+
>>> fig.show() # Will show train (blue border) vs test (red border) by subject
|
|
289
|
+
"""
|
|
290
|
+
if groups is None:
|
|
291
|
+
raise ValueError("groups must be provided for blocking split visualization")
|
|
292
|
+
|
|
293
|
+
# Get all splits
|
|
294
|
+
splits = list(self.split(X, y, timestamps, groups))
|
|
295
|
+
if not splits:
|
|
296
|
+
raise ValueError("No splits generated")
|
|
297
|
+
|
|
298
|
+
# Use sample indices if no timestamps provided
|
|
299
|
+
if timestamps is None:
|
|
300
|
+
timestamps = np.arange(len(X))
|
|
301
|
+
|
|
302
|
+
# Create figure
|
|
303
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
304
|
+
|
|
305
|
+
# Plot each fold
|
|
306
|
+
for fold, (train_idx, test_idx) in enumerate(splits):
|
|
307
|
+
y_pos = fold
|
|
308
|
+
|
|
309
|
+
# Get unique groups for train and test
|
|
310
|
+
train_groups = set(groups[train_idx])
|
|
311
|
+
test_groups = set(groups[test_idx])
|
|
312
|
+
|
|
313
|
+
# Train subjects (different colors for each group)
|
|
314
|
+
colors = plt.cm.Set3(np.linspace(0, 1, len(np.unique(groups))))
|
|
315
|
+
for i, group in enumerate(sorted(train_groups)):
|
|
316
|
+
group_mask = groups[train_idx] == group
|
|
317
|
+
group_indices = train_idx[group_mask]
|
|
318
|
+
if len(group_indices) > 0:
|
|
319
|
+
start_idx = group_indices[0]
|
|
320
|
+
end_idx = group_indices[-1]
|
|
321
|
+
width = end_idx - start_idx + 1
|
|
322
|
+
|
|
323
|
+
train_rect = patches.Rectangle(
|
|
324
|
+
(start_idx, y_pos - 0.3),
|
|
325
|
+
width, 0.6,
|
|
326
|
+
linewidth=1, edgecolor='blue',
|
|
327
|
+
facecolor=colors[group % len(colors)], alpha=0.7,
|
|
328
|
+
label=f'Train Group {group}' if fold == 0 else ""
|
|
329
|
+
)
|
|
330
|
+
ax.add_patch(train_rect)
|
|
331
|
+
|
|
332
|
+
# Test subjects
|
|
333
|
+
for i, group in enumerate(sorted(test_groups)):
|
|
334
|
+
group_mask = groups[test_idx] == group
|
|
335
|
+
group_indices = test_idx[group_mask]
|
|
336
|
+
if len(group_indices) > 0:
|
|
337
|
+
start_idx = group_indices[0]
|
|
338
|
+
end_idx = group_indices[-1]
|
|
339
|
+
width = end_idx - start_idx + 1
|
|
340
|
+
|
|
341
|
+
test_rect = patches.Rectangle(
|
|
342
|
+
(start_idx, y_pos - 0.3),
|
|
343
|
+
width, 0.6,
|
|
344
|
+
linewidth=2, edgecolor='red', facecolor='lightcoral', alpha=0.8,
|
|
345
|
+
label=f'Test Group {group}' if fold == 0 else ""
|
|
346
|
+
)
|
|
347
|
+
ax.add_patch(test_rect)
|
|
348
|
+
|
|
349
|
+
# Format plot
|
|
350
|
+
ax.set_ylim(-0.5, len(splits) - 0.5)
|
|
351
|
+
ax.set_xlim(0, len(X))
|
|
352
|
+
ax.set_xlabel('Sample Index')
|
|
353
|
+
ax.set_ylabel('Fold')
|
|
354
|
+
ax.set_title(f'Time Series Blocking Split Visualization\\n'
|
|
355
|
+
f'No mixing between subjects/groups')
|
|
356
|
+
|
|
357
|
+
# Set y-ticks
|
|
358
|
+
ax.set_yticks(range(len(splits)))
|
|
359
|
+
ax.set_yticklabels([f'Fold {i}' for i in range(len(splits))])
|
|
360
|
+
|
|
361
|
+
# Add scatter plots of actual data points with jittering
|
|
362
|
+
np.random.seed(42) # For reproducible jittering
|
|
363
|
+
jitter_strength = 0.15 # Amount of vertical jittering
|
|
364
|
+
|
|
365
|
+
for fold, (train_idx, test_idx) in enumerate(splits):
|
|
366
|
+
y_pos = fold
|
|
367
|
+
|
|
368
|
+
# Add jittered scatter plots for train indices
|
|
369
|
+
if len(train_idx) > 0:
|
|
370
|
+
train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
|
|
371
|
+
# Color by group
|
|
372
|
+
for group in np.unique(groups[train_idx]):
|
|
373
|
+
group_mask = groups[train_idx] == group
|
|
374
|
+
group_train_idx = train_idx[group_mask]
|
|
375
|
+
group_jitter = train_jitter[group_mask]
|
|
376
|
+
ax.scatter(group_train_idx, y_pos + group_jitter,
|
|
377
|
+
c='darkblue', s=15, alpha=0.6, marker='o',
|
|
378
|
+
label='Train points' if fold == 0 and group == np.unique(groups[train_idx])[0] else '',
|
|
379
|
+
zorder=3)
|
|
380
|
+
|
|
381
|
+
# Add jittered scatter plots for test indices
|
|
382
|
+
if len(test_idx) > 0:
|
|
383
|
+
test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
|
|
384
|
+
# Color by group
|
|
385
|
+
for group in np.unique(groups[test_idx]):
|
|
386
|
+
group_mask = groups[test_idx] == group
|
|
387
|
+
group_test_idx = test_idx[group_mask]
|
|
388
|
+
group_jitter = test_jitter[group_mask]
|
|
389
|
+
ax.scatter(group_test_idx, y_pos + group_jitter,
|
|
390
|
+
c='darkred', s=15, alpha=0.6, marker='s',
|
|
391
|
+
label='Test points' if fold == 0 and group == np.unique(groups[test_idx])[0] else '',
|
|
392
|
+
zorder=3)
|
|
393
|
+
|
|
394
|
+
# Create comprehensive legend
|
|
395
|
+
from matplotlib.lines import Line2D
|
|
396
|
+
from matplotlib.patches import Patch
|
|
397
|
+
|
|
398
|
+
# Get unique groups and their colors
|
|
399
|
+
unique_groups = np.unique(groups)
|
|
400
|
+
colors = plt.cm.Set3(np.linspace(0, 1, len(unique_groups)))
|
|
401
|
+
|
|
402
|
+
legend_elements = []
|
|
403
|
+
|
|
404
|
+
# Add train/test border legend
|
|
405
|
+
legend_elements.extend([
|
|
406
|
+
Line2D([0], [0], color='blue', lw=3, alpha=0.7, label='Training Set (blue border)'),
|
|
407
|
+
Line2D([0], [0], color='red', lw=3, alpha=0.8, label='Test Set (red border)')
|
|
408
|
+
])
|
|
409
|
+
|
|
410
|
+
# Add a separator
|
|
411
|
+
legend_elements.append(Line2D([0], [0], color='white', lw=0, label=''))
|
|
412
|
+
|
|
413
|
+
# Add subject color legend
|
|
414
|
+
for i, group in enumerate(sorted(unique_groups)):
|
|
415
|
+
legend_elements.append(
|
|
416
|
+
Patch(facecolor=colors[i % len(colors)], alpha=0.7,
|
|
417
|
+
label=f'Subject/Group {group}')
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# Create legend with two columns if many subjects
|
|
421
|
+
ncol = 1 if len(unique_groups) <= 3 else 2
|
|
422
|
+
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=ncol)
|
|
423
|
+
|
|
424
|
+
plt.tight_layout()
|
|
425
|
+
|
|
426
|
+
if save_path:
|
|
427
|
+
fig.savefig(save_path, dpi=150, bbox_inches='tight')
|
|
428
|
+
|
|
429
|
+
return fig
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
"""Functions & Classes"""
|
|
433
|
+
def main(args) -> int:
|
|
434
|
+
"""Demonstrate TimeSeriesBlockingSplit functionality.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
args: Command line arguments
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
int: Exit status
|
|
441
|
+
"""
|
|
442
|
+
logger.info("Demonstrating TimeSeriesBlockingSplit functionality")
|
|
443
|
+
|
|
444
|
+
# Generate test data with multiple subjects
|
|
445
|
+
np.random.seed(42)
|
|
446
|
+
n_samples = args.n_samples
|
|
447
|
+
n_subjects = args.n_subjects
|
|
448
|
+
|
|
449
|
+
# Generate data
|
|
450
|
+
X = np.random.randn(n_samples, 5)
|
|
451
|
+
y = np.random.randint(0, 2, n_samples)
|
|
452
|
+
timestamps = np.arange(n_samples) + np.random.normal(0, 0.1, n_samples)
|
|
453
|
+
|
|
454
|
+
# Create subject groups
|
|
455
|
+
samples_per_subject = n_samples // n_subjects
|
|
456
|
+
groups = np.repeat(range(n_subjects), samples_per_subject)
|
|
457
|
+
# Pad if necessary
|
|
458
|
+
groups = np.pad(groups, (0, n_samples - len(groups)), mode='constant', constant_values=n_subjects-1)
|
|
459
|
+
|
|
460
|
+
logger.info(f"Generated test data: {n_samples} samples, {n_subjects} subjects")
|
|
461
|
+
logger.info(f"Samples per subject: ~{samples_per_subject}")
|
|
462
|
+
|
|
463
|
+
# Create blocking splitter
|
|
464
|
+
splitter = TimeSeriesBlockingSplit(
|
|
465
|
+
n_splits=args.n_splits,
|
|
466
|
+
test_ratio=args.test_ratio
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
logger.info(f"Blocking split configuration:")
|
|
470
|
+
logger.info(f" Number of splits: {args.n_splits}")
|
|
471
|
+
logger.info(f" Test ratio: {args.test_ratio}")
|
|
472
|
+
|
|
473
|
+
# Test splits
|
|
474
|
+
for fold, (train_idx, test_idx) in enumerate(splitter.split(X, y, timestamps, groups)):
|
|
475
|
+
train_subjects = sorted(set(groups[train_idx]))
|
|
476
|
+
test_subjects = sorted(set(groups[test_idx]))
|
|
477
|
+
|
|
478
|
+
logger.info(f"Fold {fold}:")
|
|
479
|
+
logger.info(f" Train: {len(train_idx)} samples from subjects {train_subjects}")
|
|
480
|
+
logger.info(f" Test: {len(test_idx)} samples from subjects {test_subjects}")
|
|
481
|
+
|
|
482
|
+
# Check subject overlap
|
|
483
|
+
overlap = set(train_subjects) & set(test_subjects)
|
|
484
|
+
if overlap:
|
|
485
|
+
logger.info(f" Subjects in both: {sorted(overlap)} (temporal separation maintained)")
|
|
486
|
+
else:
|
|
487
|
+
logger.info(f" No subject overlap")
|
|
488
|
+
|
|
489
|
+
# Generate visualization
|
|
490
|
+
logger.info("Generating blocking split visualization with scatter plots")
|
|
491
|
+
fig = splitter.plot_splits(X, y, timestamps, groups)
|
|
492
|
+
|
|
493
|
+
# Save using SciTeX framework
|
|
494
|
+
stx.io.save(fig, "./blocking_splits_demo.png", symlink_from_cwd=True)
|
|
495
|
+
plt.close(fig)
|
|
496
|
+
|
|
497
|
+
logger.info("TimeSeriesBlockingSplit demonstration completed successfully")
|
|
498
|
+
return 0
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def parse_args() -> argparse.Namespace:
|
|
502
|
+
"""Parse command line arguments."""
|
|
503
|
+
parser = argparse.ArgumentParser(
|
|
504
|
+
description='Demonstrate TimeSeriesBlockingSplit for multi-subject time series'
|
|
505
|
+
)
|
|
506
|
+
parser.add_argument(
|
|
507
|
+
"--n-samples",
|
|
508
|
+
type=int,
|
|
509
|
+
default=300,
|
|
510
|
+
help="Total number of samples (default: %(default)s)",
|
|
511
|
+
)
|
|
512
|
+
parser.add_argument(
|
|
513
|
+
"--n-subjects",
|
|
514
|
+
type=int,
|
|
515
|
+
default=4,
|
|
516
|
+
help="Number of subjects/groups (default: %(default)s)",
|
|
517
|
+
)
|
|
518
|
+
parser.add_argument(
|
|
519
|
+
"--n-splits",
|
|
520
|
+
type=int,
|
|
521
|
+
default=3,
|
|
522
|
+
help="Number of CV splits (default: %(default)s)",
|
|
523
|
+
)
|
|
524
|
+
parser.add_argument(
|
|
525
|
+
"--test-ratio",
|
|
526
|
+
type=float,
|
|
527
|
+
default=0.3,
|
|
528
|
+
help="Proportion of data for test per subject (default: %(default)s)",
|
|
529
|
+
)
|
|
530
|
+
args = parser.parse_args()
|
|
531
|
+
return args
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def run_main() -> None:
|
|
535
|
+
"""Initialize scitex framework, run main function, and cleanup."""
|
|
536
|
+
global CONFIG, CC, sys, plt, rng
|
|
537
|
+
|
|
538
|
+
import sys
|
|
539
|
+
import matplotlib.pyplot as plt
|
|
540
|
+
import scitex as stx
|
|
541
|
+
|
|
542
|
+
args = parse_args()
|
|
543
|
+
|
|
544
|
+
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
|
|
545
|
+
sys,
|
|
546
|
+
plt,
|
|
547
|
+
args=args,
|
|
548
|
+
file=__FILE__,
|
|
549
|
+
sdir_suffix=None,
|
|
550
|
+
verbose=False,
|
|
551
|
+
agg=True,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
exit_status = main(args)
|
|
555
|
+
|
|
556
|
+
stx.session.close(
|
|
557
|
+
CONFIG,
|
|
558
|
+
verbose=False,
|
|
559
|
+
notify=False,
|
|
560
|
+
message="",
|
|
561
|
+
exit_status=exit_status,
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
if __name__ == '__main__':
|
|
566
|
+
run_main()
|
|
567
|
+
|
|
568
|
+
# EOF
|