scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-21 20:45:00 (ywatanabe)"
|
|
4
|
+
# File: _TimeSeriesStrategy.py
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Time series cross-validation strategy enumeration.
|
|
8
|
+
|
|
9
|
+
Defines available strategies for time series CV.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from enum import Enum
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TimeSeriesStrategy(Enum):
|
|
16
|
+
"""
|
|
17
|
+
Available time series CV strategies.
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
STRATIFIED : str
|
|
22
|
+
Single time series with class balance preservation
|
|
23
|
+
BLOCKING : str
|
|
24
|
+
Multiple independent time series (e.g., different patients)
|
|
25
|
+
SLIDING : str
|
|
26
|
+
Sliding window approach with fixed-size windows
|
|
27
|
+
EXPANDING : str
|
|
28
|
+
Expanding window where training set grows over time
|
|
29
|
+
FIXED : str
|
|
30
|
+
Fixed train/test split at specific time point
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
STRATIFIED = "stratified" # Single time series with class balance
|
|
34
|
+
BLOCKING = "blocking" # Multiple time series (e.g., patients)
|
|
35
|
+
SLIDING = "sliding" # Sliding window approach
|
|
36
|
+
EXPANDING = "expanding" # Expanding window (train grows)
|
|
37
|
+
FIXED = "fixed" # Fixed train/test split
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_string(cls, value: str) -> 'TimeSeriesStrategy':
|
|
41
|
+
"""
|
|
42
|
+
Create strategy from string value.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
value : str
|
|
47
|
+
String representation of strategy
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
TimeSeriesStrategy
|
|
52
|
+
Corresponding enum value
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
ValueError
|
|
57
|
+
If value doesn't match any strategy
|
|
58
|
+
"""
|
|
59
|
+
value_lower = value.lower()
|
|
60
|
+
for strategy in cls:
|
|
61
|
+
if strategy.value == value_lower:
|
|
62
|
+
return strategy
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Unknown strategy: {value}. "
|
|
65
|
+
f"Valid options are: {[s.value for s in cls]}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def get_description(self) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Get human-readable description of the strategy.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
str
|
|
75
|
+
Description of the strategy
|
|
76
|
+
"""
|
|
77
|
+
descriptions = {
|
|
78
|
+
self.STRATIFIED: "Maintains class balance while respecting time order",
|
|
79
|
+
self.BLOCKING: "Handles multiple independent time series",
|
|
80
|
+
self.SLIDING: "Uses fixed-size sliding windows through time",
|
|
81
|
+
self.EXPANDING: "Training set expands while test moves forward",
|
|
82
|
+
self.FIXED: "Single fixed split at specific time point"
|
|
83
|
+
}
|
|
84
|
+
return descriptions.get(self, "Unknown strategy")
|
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 16:50:00 (ywatanabe)"
|
|
4
|
+
# File: _TimeSeriesStratifiedSplit.py
|
|
5
|
+
|
|
6
|
+
__FILE__ = "_TimeSeriesStratifiedSplit.py"
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Functionalities:
|
|
10
|
+
- Implements time series cross-validation with stratification support
|
|
11
|
+
- Ensures chronological order (test data always after training data)
|
|
12
|
+
- Supports optional validation set between train and test
|
|
13
|
+
- Maintains temporal gaps to prevent data leakage
|
|
14
|
+
- Provides visualization with scatter plots for verification
|
|
15
|
+
- Validates temporal integrity in all splits
|
|
16
|
+
|
|
17
|
+
Dependencies:
|
|
18
|
+
- packages:
|
|
19
|
+
- numpy
|
|
20
|
+
- sklearn
|
|
21
|
+
- matplotlib
|
|
22
|
+
|
|
23
|
+
IO:
|
|
24
|
+
- input-files:
|
|
25
|
+
- None (generates synthetic data for demonstration)
|
|
26
|
+
- output-files:
|
|
27
|
+
- ./stratified_splits_demo.png (visualization)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
"""Imports"""
|
|
31
|
+
import os
|
|
32
|
+
import sys
|
|
33
|
+
import argparse
|
|
34
|
+
import numpy as np
|
|
35
|
+
from typing import Iterator, Optional, Tuple
|
|
36
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
37
|
+
from sklearn.utils.validation import _num_samples
|
|
38
|
+
import matplotlib.pyplot as plt
|
|
39
|
+
import matplotlib.patches as patches
|
|
40
|
+
import scitex as stx
|
|
41
|
+
from scitex import logging
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TimeSeriesStratifiedSplit(BaseCrossValidator):
|
|
47
|
+
"""
|
|
48
|
+
Time series cross-validation with stratification support.
|
|
49
|
+
|
|
50
|
+
This splitter ensures:
|
|
51
|
+
1. Test data is always chronologically after training data
|
|
52
|
+
2. Optional validation set between train and test
|
|
53
|
+
3. Class balance preservation in splits
|
|
54
|
+
4. Gap period between train and test to avoid leakage
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
n_splits : int
|
|
59
|
+
Number of splits (folds)
|
|
60
|
+
test_ratio : float
|
|
61
|
+
Proportion of data for test set (default: 0.2)
|
|
62
|
+
val_ratio : float
|
|
63
|
+
Proportion of data for validation set (default: 0.1)
|
|
64
|
+
gap : int
|
|
65
|
+
Number of samples to exclude between train and test (default: 0)
|
|
66
|
+
stratify : bool
|
|
67
|
+
Whether to maintain class proportions (default: True)
|
|
68
|
+
random_state : int, optional
|
|
69
|
+
Random seed for reproducibility (default: None)
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> from scitex.ml.classification import TimeSeriesStratifiedSplit
|
|
74
|
+
>>> import numpy as np
|
|
75
|
+
>>>
|
|
76
|
+
>>> X = np.random.randn(100, 10)
|
|
77
|
+
>>> y = np.random.randint(0, 2, 100)
|
|
78
|
+
>>> timestamps = np.arange(100)
|
|
79
|
+
>>>
|
|
80
|
+
>>> tscv = TimeSeriesStratifiedSplit(n_splits=3)
|
|
81
|
+
>>> for train_idx, test_idx in tscv.split(X, y, timestamps):
|
|
82
|
+
... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}")
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
n_splits: int = 5,
|
|
88
|
+
test_ratio: float = 0.2,
|
|
89
|
+
val_ratio: float = 0.1,
|
|
90
|
+
gap: int = 0,
|
|
91
|
+
stratify: bool = True,
|
|
92
|
+
random_state: Optional[int] = None,
|
|
93
|
+
):
|
|
94
|
+
self.n_splits = n_splits
|
|
95
|
+
self.test_ratio = test_ratio
|
|
96
|
+
self.val_ratio = val_ratio
|
|
97
|
+
self.gap = gap
|
|
98
|
+
self.stratify = stratify
|
|
99
|
+
self.random_state = random_state
|
|
100
|
+
self.rng = np.random.default_rng(random_state)
|
|
101
|
+
|
|
102
|
+
def split(
|
|
103
|
+
self,
|
|
104
|
+
X: np.ndarray,
|
|
105
|
+
y: Optional[np.ndarray] = None,
|
|
106
|
+
timestamps: Optional[np.ndarray] = None,
|
|
107
|
+
groups: Optional[np.ndarray] = None,
|
|
108
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
109
|
+
"""
|
|
110
|
+
Generate indices to split data into training and test sets.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
X : array-like, shape (n_samples, n_features)
|
|
115
|
+
Training data
|
|
116
|
+
y : array-like, shape (n_samples,)
|
|
117
|
+
Target variable
|
|
118
|
+
timestamps : array-like, shape (n_samples,)
|
|
119
|
+
Timestamps for temporal ordering (required)
|
|
120
|
+
groups : array-like, shape (n_samples,), optional
|
|
121
|
+
Group labels for grouped CV
|
|
122
|
+
|
|
123
|
+
Yields
|
|
124
|
+
------
|
|
125
|
+
train : ndarray
|
|
126
|
+
Training set indices
|
|
127
|
+
test : ndarray
|
|
128
|
+
Test set indices
|
|
129
|
+
"""
|
|
130
|
+
if timestamps is None:
|
|
131
|
+
raise ValueError("timestamps must be provided for time series split")
|
|
132
|
+
|
|
133
|
+
n_samples = _num_samples(X)
|
|
134
|
+
indices = np.arange(n_samples)
|
|
135
|
+
|
|
136
|
+
# Sort by timestamp
|
|
137
|
+
time_order = np.argsort(timestamps)
|
|
138
|
+
sorted_indices = indices[time_order]
|
|
139
|
+
sorted_y = y[time_order] if y is not None else None
|
|
140
|
+
|
|
141
|
+
# Calculate split sizes
|
|
142
|
+
test_size = int(n_samples * self.test_ratio)
|
|
143
|
+
val_size = int(n_samples * self.val_ratio) if self.val_ratio > 0 else 0
|
|
144
|
+
|
|
145
|
+
# Generate splits with expanding training window
|
|
146
|
+
for i in range(self.n_splits):
|
|
147
|
+
# Expanding window approach
|
|
148
|
+
train_end = n_samples - test_size - self.gap
|
|
149
|
+
train_end = train_end - (self.n_splits - i - 1) * (test_size // self.n_splits)
|
|
150
|
+
train_end = max(test_size, train_end) # Ensure min training size
|
|
151
|
+
|
|
152
|
+
# Apply gap and start test set immediately after gap
|
|
153
|
+
test_start = train_end + self.gap
|
|
154
|
+
test_end = min(test_start + test_size, n_samples)
|
|
155
|
+
|
|
156
|
+
# Get indices
|
|
157
|
+
train_indices = sorted_indices[:train_end]
|
|
158
|
+
test_indices = sorted_indices[test_start:test_end]
|
|
159
|
+
|
|
160
|
+
# For time series, temporal integrity is prioritized over stratification
|
|
161
|
+
# Chronological order must be preserved to prevent data leakage
|
|
162
|
+
# Class imbalance should be handled through other methods or at dataset level
|
|
163
|
+
|
|
164
|
+
yield train_indices, test_indices
|
|
165
|
+
|
|
166
|
+
def split_with_val(
|
|
167
|
+
self,
|
|
168
|
+
X: np.ndarray,
|
|
169
|
+
y: Optional[np.ndarray] = None,
|
|
170
|
+
timestamps: Optional[np.ndarray] = None,
|
|
171
|
+
groups: Optional[np.ndarray] = None,
|
|
172
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
173
|
+
"""
|
|
174
|
+
Generate indices with separate validation set.
|
|
175
|
+
|
|
176
|
+
Yields
|
|
177
|
+
------
|
|
178
|
+
train : ndarray
|
|
179
|
+
Training set indices
|
|
180
|
+
val : ndarray
|
|
181
|
+
Validation set indices
|
|
182
|
+
test : ndarray
|
|
183
|
+
Test set indices
|
|
184
|
+
"""
|
|
185
|
+
if timestamps is None:
|
|
186
|
+
raise ValueError("timestamps must be provided for time series split")
|
|
187
|
+
|
|
188
|
+
n_samples = _num_samples(X)
|
|
189
|
+
indices = np.arange(n_samples)
|
|
190
|
+
|
|
191
|
+
# Sort by timestamp
|
|
192
|
+
time_order = np.argsort(timestamps)
|
|
193
|
+
sorted_indices = indices[time_order]
|
|
194
|
+
sorted_y = y[time_order] if y is not None else None
|
|
195
|
+
|
|
196
|
+
# Calculate split sizes
|
|
197
|
+
test_size = int(n_samples * self.test_ratio)
|
|
198
|
+
val_size = int(n_samples * self.val_ratio) if self.val_ratio > 0 else 0
|
|
199
|
+
|
|
200
|
+
# Generate splits with strict temporal order
|
|
201
|
+
for i in range(self.n_splits):
|
|
202
|
+
# Calculate split points in temporal order (sorted domain)
|
|
203
|
+
# Work backwards from the end to ensure proper spacing
|
|
204
|
+
test_start_pos = n_samples - test_size
|
|
205
|
+
test_start_pos = test_start_pos - i * (test_size // self.n_splits) # Earlier for each fold
|
|
206
|
+
test_end_pos = min(test_start_pos + test_size, n_samples)
|
|
207
|
+
|
|
208
|
+
# Validation comes before test with gap
|
|
209
|
+
val_end_pos = test_start_pos - self.gap
|
|
210
|
+
val_start_pos = max(0, val_end_pos - val_size)
|
|
211
|
+
|
|
212
|
+
# Training comes before validation with gap
|
|
213
|
+
train_end_pos = val_start_pos - self.gap
|
|
214
|
+
train_start_pos = 0 # Always start from beginning (expanding window)
|
|
215
|
+
|
|
216
|
+
# Ensure all positions are valid
|
|
217
|
+
if train_end_pos <= train_start_pos or val_start_pos >= val_end_pos or test_start_pos >= test_end_pos:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# Extract indices from temporally sorted sequence
|
|
221
|
+
train_indices = sorted_indices[train_start_pos:train_end_pos]
|
|
222
|
+
val_indices = sorted_indices[val_start_pos:val_end_pos]
|
|
223
|
+
test_indices = sorted_indices[test_start_pos:test_end_pos]
|
|
224
|
+
|
|
225
|
+
# For split_with_val, we prioritize temporal integrity over stratification
|
|
226
|
+
# to ensure no overlapping between train, validation, and test sets
|
|
227
|
+
# Class imbalance should be handled through other methods for 3-way splits
|
|
228
|
+
|
|
229
|
+
yield train_indices, val_indices, test_indices
|
|
230
|
+
|
|
231
|
+
def _stratify_indices_temporal(
|
|
232
|
+
self, indices: np.ndarray, y: np.ndarray, target_size: int
|
|
233
|
+
) -> np.ndarray:
|
|
234
|
+
"""Apply stratification while preserving temporal order for time series.
|
|
235
|
+
|
|
236
|
+
This method maintains chronological order as the top priority while
|
|
237
|
+
attempting to balance class representation within the temporal window.
|
|
238
|
+
"""
|
|
239
|
+
# If target_size >= current size, return as-is
|
|
240
|
+
if target_size >= len(indices):
|
|
241
|
+
return indices
|
|
242
|
+
|
|
243
|
+
# Get the labels for these indices in their current temporal order
|
|
244
|
+
current_labels = y[indices]
|
|
245
|
+
unique_classes = np.unique(current_labels)
|
|
246
|
+
|
|
247
|
+
# Calculate desired samples per class based on current distribution
|
|
248
|
+
class_counts = {}
|
|
249
|
+
for cls in unique_classes:
|
|
250
|
+
class_counts[cls] = np.sum(current_labels == cls)
|
|
251
|
+
|
|
252
|
+
total_current = len(indices)
|
|
253
|
+
|
|
254
|
+
# Calculate target samples per class, proportional to current distribution
|
|
255
|
+
target_per_class = {}
|
|
256
|
+
remaining_target = target_size
|
|
257
|
+
|
|
258
|
+
for cls in unique_classes:
|
|
259
|
+
proportion = class_counts[cls] / total_current
|
|
260
|
+
target_count = max(1, int(target_size * proportion))
|
|
261
|
+
target_per_class[cls] = min(target_count, class_counts[cls])
|
|
262
|
+
remaining_target -= target_per_class[cls]
|
|
263
|
+
|
|
264
|
+
# Adjust if we're under/over the target
|
|
265
|
+
if remaining_target > 0:
|
|
266
|
+
# Distribute remaining samples to classes with most samples
|
|
267
|
+
sorted_classes = sorted(unique_classes,
|
|
268
|
+
key=lambda x: class_counts[x], reverse=True)
|
|
269
|
+
for cls in sorted_classes:
|
|
270
|
+
if remaining_target <= 0:
|
|
271
|
+
break
|
|
272
|
+
if target_per_class[cls] < class_counts[cls]:
|
|
273
|
+
target_per_class[cls] += 1
|
|
274
|
+
remaining_target -= 1
|
|
275
|
+
|
|
276
|
+
# Select indices while preserving temporal order
|
|
277
|
+
selected_indices = []
|
|
278
|
+
class_taken = {cls: 0 for cls in unique_classes}
|
|
279
|
+
|
|
280
|
+
for idx in indices: # indices are already in temporal order
|
|
281
|
+
label = y[idx]
|
|
282
|
+
if class_taken[label] < target_per_class[label]:
|
|
283
|
+
selected_indices.append(idx)
|
|
284
|
+
class_taken[label] += 1
|
|
285
|
+
|
|
286
|
+
# Stop if we've reached our target
|
|
287
|
+
if len(selected_indices) >= target_size:
|
|
288
|
+
break
|
|
289
|
+
|
|
290
|
+
return np.array(selected_indices)
|
|
291
|
+
|
|
292
|
+
def get_n_splits(self, X=None, y=None, groups=None):
|
|
293
|
+
"""Returns the number of splitting iterations in the CV."""
|
|
294
|
+
return self.n_splits
|
|
295
|
+
|
|
296
|
+
def _find_contiguous_segments(self, indices):
|
|
297
|
+
"""Find contiguous segments in a sorted array of indices."""
|
|
298
|
+
if len(indices) == 0:
|
|
299
|
+
return []
|
|
300
|
+
|
|
301
|
+
sorted_indices = np.sort(indices)
|
|
302
|
+
segments = []
|
|
303
|
+
start = sorted_indices[0]
|
|
304
|
+
end = sorted_indices[0]
|
|
305
|
+
|
|
306
|
+
for i in range(1, len(sorted_indices)):
|
|
307
|
+
if sorted_indices[i] == end + 1:
|
|
308
|
+
end = sorted_indices[i]
|
|
309
|
+
else:
|
|
310
|
+
segments.append((start, end))
|
|
311
|
+
start = sorted_indices[i]
|
|
312
|
+
end = sorted_indices[i]
|
|
313
|
+
|
|
314
|
+
segments.append((start, end))
|
|
315
|
+
return segments
|
|
316
|
+
|
|
317
|
+
def plot_splits(self, X, y=None, timestamps=None, figsize=(12, 6), save_path=None):
|
|
318
|
+
"""
|
|
319
|
+
Visualize the stratified time series splits.
|
|
320
|
+
|
|
321
|
+
Shows train (blue), validation (green), and test (red) sets.
|
|
322
|
+
When val_ratio=0, only shows train and test.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
X : array-like
|
|
327
|
+
Training data
|
|
328
|
+
y : array-like, optional
|
|
329
|
+
Target variable
|
|
330
|
+
timestamps : array-like, optional
|
|
331
|
+
Timestamps (if None, uses sample indices)
|
|
332
|
+
figsize : tuple, default (12, 6)
|
|
333
|
+
Figure size
|
|
334
|
+
save_path : str, optional
|
|
335
|
+
Path to save the plot
|
|
336
|
+
|
|
337
|
+
Returns
|
|
338
|
+
-------
|
|
339
|
+
fig : matplotlib.figure.Figure
|
|
340
|
+
The created figure
|
|
341
|
+
"""
|
|
342
|
+
# Use sample indices if no timestamps provided
|
|
343
|
+
if timestamps is None:
|
|
344
|
+
timestamps = np.arange(len(X))
|
|
345
|
+
|
|
346
|
+
# Create figure
|
|
347
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
348
|
+
|
|
349
|
+
# Check if we have validation sets
|
|
350
|
+
if self.val_ratio > 0:
|
|
351
|
+
# Use split_with_val for 3-way splits
|
|
352
|
+
splits = list(self.split_with_val(X, y, timestamps))
|
|
353
|
+
split_type = "train-val-test"
|
|
354
|
+
else:
|
|
355
|
+
# Use regular split for 2-way splits
|
|
356
|
+
splits = list(self.split(X, y, timestamps))
|
|
357
|
+
split_type = "train-test"
|
|
358
|
+
|
|
359
|
+
if not splits:
|
|
360
|
+
raise ValueError("No splits generated")
|
|
361
|
+
|
|
362
|
+
# Plot each fold
|
|
363
|
+
for fold, split_indices in enumerate(splits):
|
|
364
|
+
y_pos = fold
|
|
365
|
+
|
|
366
|
+
if len(split_indices) == 3: # train, val, test
|
|
367
|
+
train_idx, val_idx, test_idx = split_indices
|
|
368
|
+
|
|
369
|
+
# Train set (blue) - plot as individual segments if non-contiguous
|
|
370
|
+
if len(train_idx) > 0:
|
|
371
|
+
# Find contiguous segments in train indices
|
|
372
|
+
train_segments = self._find_contiguous_segments(train_idx)
|
|
373
|
+
for start_idx, end_idx in train_segments:
|
|
374
|
+
train_rect = patches.Rectangle(
|
|
375
|
+
(start_idx, y_pos - 0.3),
|
|
376
|
+
end_idx - start_idx + 1, 0.6,
|
|
377
|
+
linewidth=1, edgecolor='blue', facecolor='lightblue', alpha=0.7,
|
|
378
|
+
label='Train' if fold == 0 and start_idx == train_segments[0][0] else ""
|
|
379
|
+
)
|
|
380
|
+
ax.add_patch(train_rect)
|
|
381
|
+
|
|
382
|
+
# Validation set (green) - plot as individual segments if non-contiguous
|
|
383
|
+
if len(val_idx) > 0:
|
|
384
|
+
val_segments = self._find_contiguous_segments(val_idx)
|
|
385
|
+
for start_idx, end_idx in val_segments:
|
|
386
|
+
val_rect = patches.Rectangle(
|
|
387
|
+
(start_idx, y_pos - 0.3),
|
|
388
|
+
end_idx - start_idx + 1, 0.6,
|
|
389
|
+
linewidth=1, edgecolor='green', facecolor='lightgreen', alpha=0.7,
|
|
390
|
+
label='Validation' if fold == 0 and start_idx == val_segments[0][0] else ""
|
|
391
|
+
)
|
|
392
|
+
ax.add_patch(val_rect)
|
|
393
|
+
|
|
394
|
+
# Test set (red) - plot as individual segments if non-contiguous
|
|
395
|
+
if len(test_idx) > 0:
|
|
396
|
+
test_segments = self._find_contiguous_segments(test_idx)
|
|
397
|
+
for start_idx, end_idx in test_segments:
|
|
398
|
+
test_rect = patches.Rectangle(
|
|
399
|
+
(start_idx, y_pos - 0.3),
|
|
400
|
+
end_idx - start_idx + 1, 0.6,
|
|
401
|
+
linewidth=1, edgecolor='red', facecolor='lightcoral', alpha=0.7,
|
|
402
|
+
label='Test' if fold == 0 and start_idx == test_segments[0][0] else ""
|
|
403
|
+
)
|
|
404
|
+
ax.add_patch(test_rect)
|
|
405
|
+
|
|
406
|
+
else: # train, test (2-way split)
|
|
407
|
+
train_idx, test_idx = split_indices
|
|
408
|
+
|
|
409
|
+
# Train set (blue) - plot as individual segments if non-contiguous
|
|
410
|
+
if len(train_idx) > 0:
|
|
411
|
+
train_segments = self._find_contiguous_segments(train_idx)
|
|
412
|
+
for start_idx, end_idx in train_segments:
|
|
413
|
+
train_rect = patches.Rectangle(
|
|
414
|
+
(start_idx, y_pos - 0.3),
|
|
415
|
+
end_idx - start_idx + 1, 0.6,
|
|
416
|
+
linewidth=1, edgecolor='blue', facecolor='lightblue', alpha=0.7,
|
|
417
|
+
label='Train' if fold == 0 and start_idx == train_segments[0][0] else ""
|
|
418
|
+
)
|
|
419
|
+
ax.add_patch(train_rect)
|
|
420
|
+
|
|
421
|
+
# Test set (red) - plot as individual segments if non-contiguous
|
|
422
|
+
if len(test_idx) > 0:
|
|
423
|
+
test_segments = self._find_contiguous_segments(test_idx)
|
|
424
|
+
for start_idx, end_idx in test_segments:
|
|
425
|
+
test_rect = patches.Rectangle(
|
|
426
|
+
(start_idx, y_pos - 0.3),
|
|
427
|
+
end_idx - start_idx + 1, 0.6,
|
|
428
|
+
linewidth=1, edgecolor='red', facecolor='lightcoral', alpha=0.7,
|
|
429
|
+
label='Test' if fold == 0 and start_idx == test_segments[0][0] else ""
|
|
430
|
+
)
|
|
431
|
+
ax.add_patch(test_rect)
|
|
432
|
+
|
|
433
|
+
# Add scatter plots of actual data points with jittering
|
|
434
|
+
np.random.seed(42) # For reproducible jittering
|
|
435
|
+
jitter_strength = 0.15 # Amount of vertical jittering
|
|
436
|
+
|
|
437
|
+
for fold, split_indices in enumerate(splits):
|
|
438
|
+
y_pos = fold
|
|
439
|
+
|
|
440
|
+
if len(split_indices) == 3: # train, val, test
|
|
441
|
+
train_idx, val_idx, test_idx = split_indices
|
|
442
|
+
|
|
443
|
+
# Add jittered scatter plots for 3-way split
|
|
444
|
+
if len(train_idx) > 0:
|
|
445
|
+
train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
|
|
446
|
+
ax.scatter(train_idx, y_pos + train_jitter,
|
|
447
|
+
c='darkblue', s=15, alpha=0.6, marker='o',
|
|
448
|
+
label='Train points' if fold == 0 else '', zorder=3)
|
|
449
|
+
|
|
450
|
+
if len(val_idx) > 0:
|
|
451
|
+
val_jitter = np.random.normal(0, jitter_strength, len(val_idx))
|
|
452
|
+
ax.scatter(val_idx, y_pos + val_jitter,
|
|
453
|
+
c='darkgreen', s=15, alpha=0.6, marker='^',
|
|
454
|
+
label='Val points' if fold == 0 else '', zorder=3)
|
|
455
|
+
|
|
456
|
+
if len(test_idx) > 0:
|
|
457
|
+
test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
|
|
458
|
+
ax.scatter(test_idx, y_pos + test_jitter,
|
|
459
|
+
c='darkred', s=15, alpha=0.6, marker='s',
|
|
460
|
+
label='Test points' if fold == 0 else '', zorder=3)
|
|
461
|
+
|
|
462
|
+
else: # train, test (2-way split)
|
|
463
|
+
train_idx, test_idx = split_indices
|
|
464
|
+
|
|
465
|
+
# Add jittered scatter plots for 2-way split
|
|
466
|
+
if len(train_idx) > 0:
|
|
467
|
+
train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
|
|
468
|
+
ax.scatter(train_idx, y_pos + train_jitter,
|
|
469
|
+
c='darkblue', s=15, alpha=0.6, marker='o',
|
|
470
|
+
label='Train points' if fold == 0 else '', zorder=3)
|
|
471
|
+
|
|
472
|
+
if len(test_idx) > 0:
|
|
473
|
+
test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
|
|
474
|
+
ax.scatter(test_idx, y_pos + test_jitter,
|
|
475
|
+
c='darkred', s=15, alpha=0.6, marker='s',
|
|
476
|
+
label='Test points' if fold == 0 else '', zorder=3)
|
|
477
|
+
|
|
478
|
+
# Format plot
|
|
479
|
+
ax.set_ylim(-0.5, len(splits) - 0.5)
|
|
480
|
+
ax.set_xlim(0, len(X))
|
|
481
|
+
ax.set_xlabel('Sample Index (original order)')
|
|
482
|
+
ax.set_ylabel('Fold')
|
|
483
|
+
|
|
484
|
+
title = f'Time Series Stratified Split Visualization ({split_type})'
|
|
485
|
+
if self.stratify:
|
|
486
|
+
title += '\nMaintains class balance across splits'
|
|
487
|
+
if self.gap > 0:
|
|
488
|
+
title += f', Gap: {self.gap} samples'
|
|
489
|
+
title += '\nRectangles show ranges, dots show actual data points'
|
|
490
|
+
ax.set_title(title)
|
|
491
|
+
|
|
492
|
+
# Set y-ticks
|
|
493
|
+
ax.set_yticks(range(len(splits)))
|
|
494
|
+
ax.set_yticklabels([f'Fold {i}' for i in range(len(splits))])
|
|
495
|
+
|
|
496
|
+
# Add legend with scatter points
|
|
497
|
+
ax.legend(loc='upper right')
|
|
498
|
+
|
|
499
|
+
plt.tight_layout()
|
|
500
|
+
|
|
501
|
+
if save_path:
|
|
502
|
+
fig.savefig(save_path, dpi=150, bbox_inches='tight')
|
|
503
|
+
|
|
504
|
+
return fig
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
"""Functions & Classes"""
|
|
508
|
+
def main(args) -> int:
|
|
509
|
+
"""Demonstrate TimeSeriesStratifiedSplit functionality.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
args: Command line arguments
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
int: Exit status
|
|
516
|
+
"""
|
|
517
|
+
logger.info("Demonstrating TimeSeriesStratifiedSplit functionality")
|
|
518
|
+
|
|
519
|
+
# Generate test data
|
|
520
|
+
np.random.seed(42)
|
|
521
|
+
n_samples = 200
|
|
522
|
+
X = np.random.randn(n_samples, 5)
|
|
523
|
+
y = np.random.randint(0, 2, n_samples)
|
|
524
|
+
timestamps = np.arange(n_samples) + np.random.normal(0, 0.1, n_samples)
|
|
525
|
+
|
|
526
|
+
logger.info(f"Generated test data: {n_samples} samples, {X.shape[1]} features, {len(np.unique(y))} classes")
|
|
527
|
+
|
|
528
|
+
# Test regular split
|
|
529
|
+
logger.info("Testing regular train/test split")
|
|
530
|
+
splitter = TimeSeriesStratifiedSplit(n_splits=3, test_ratio=0.2, gap=5)
|
|
531
|
+
for fold, (train_idx, test_idx) in enumerate(splitter.split(X, y, timestamps)):
|
|
532
|
+
logger.info(f"Fold {fold}: Train={len(train_idx)}, Test={len(test_idx)}")
|
|
533
|
+
|
|
534
|
+
# Test split with validation
|
|
535
|
+
logger.info("Testing train/validation/test split")
|
|
536
|
+
splitter_val = TimeSeriesStratifiedSplit(n_splits=2, test_ratio=0.2, val_ratio=0.15, gap=3)
|
|
537
|
+
for fold, (train_idx, val_idx, test_idx) in enumerate(splitter_val.split_with_val(X, y, timestamps)):
|
|
538
|
+
logger.info(f"Fold {fold}: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}")
|
|
539
|
+
|
|
540
|
+
# Check temporal order
|
|
541
|
+
train_times = timestamps[train_idx]
|
|
542
|
+
val_times = timestamps[val_idx] if len(val_idx) > 0 else np.array([])
|
|
543
|
+
test_times = timestamps[test_idx] if len(test_idx) > 0 else np.array([])
|
|
544
|
+
|
|
545
|
+
temporal_ok = True
|
|
546
|
+
if len(val_times) > 0 and len(test_times) > 0:
|
|
547
|
+
temporal_ok = (train_times.max() < val_times.min()) and (val_times.max() < test_times.min())
|
|
548
|
+
elif len(test_times) > 0:
|
|
549
|
+
temporal_ok = train_times.max() < test_times.min()
|
|
550
|
+
|
|
551
|
+
status = "✓" if temporal_ok else "✗"
|
|
552
|
+
logger.info(f" Temporal order: {status}")
|
|
553
|
+
|
|
554
|
+
# Generate visualization
|
|
555
|
+
logger.info("Generating split visualization")
|
|
556
|
+
fig = splitter_val.plot_splits(X, y, timestamps)
|
|
557
|
+
|
|
558
|
+
# Save using SciTeX framework
|
|
559
|
+
stx.io.save(fig, "./stratified_splits_demo.png", symlink_from_cwd=True)
|
|
560
|
+
plt.close(fig)
|
|
561
|
+
|
|
562
|
+
logger.info("TimeSeriesStratifiedSplit demonstration completed successfully")
|
|
563
|
+
return 0
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def parse_args() -> argparse.Namespace:
|
|
567
|
+
"""Parse command line arguments."""
|
|
568
|
+
import argparse
|
|
569
|
+
parser = argparse.ArgumentParser(
|
|
570
|
+
description='Demonstrate TimeSeriesStratifiedSplit with temporal integrity validation'
|
|
571
|
+
)
|
|
572
|
+
args = parser.parse_args()
|
|
573
|
+
return args
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def run_main() -> None:
|
|
577
|
+
"""Initialize scitex framework, run main function, and cleanup."""
|
|
578
|
+
global CONFIG, CC, sys, plt, rng
|
|
579
|
+
|
|
580
|
+
import sys
|
|
581
|
+
import matplotlib.pyplot as plt
|
|
582
|
+
import scitex as stx
|
|
583
|
+
|
|
584
|
+
args = parse_args()
|
|
585
|
+
|
|
586
|
+
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
|
|
587
|
+
sys,
|
|
588
|
+
plt,
|
|
589
|
+
args=args,
|
|
590
|
+
file=__FILE__,
|
|
591
|
+
sdir_suffix=None,
|
|
592
|
+
verbose=False,
|
|
593
|
+
agg=True,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
exit_status = main(args)
|
|
597
|
+
|
|
598
|
+
stx.session.close(
|
|
599
|
+
CONFIG,
|
|
600
|
+
verbose=False,
|
|
601
|
+
notify=False,
|
|
602
|
+
message="",
|
|
603
|
+
exit_status=exit_status,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
if __name__ == '__main__':
|
|
608
|
+
run_main()
|
|
609
|
+
|
|
610
|
+
# EOF
|