scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-22 17:15:00 (ywatanabe)"
|
|
4
|
+
# File: _TimeSeriesCalendarSplit.py
|
|
5
|
+
|
|
6
|
+
__FILE__ = "_TimeSeriesCalendarSplit.py"
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Functionalities:
|
|
10
|
+
- Implements calendar-based time series cross-validation
|
|
11
|
+
- Splits data based on calendar intervals (monthly, weekly, daily)
|
|
12
|
+
- Ensures temporal order preservation with no data leakage
|
|
13
|
+
- Supports flexible interval definitions (D, W, M, Q, Y)
|
|
14
|
+
- Provides visualization with scatter plots showing actual data points
|
|
15
|
+
- Useful for financial data, sales forecasting, seasonal patterns
|
|
16
|
+
|
|
17
|
+
Dependencies:
|
|
18
|
+
- packages:
|
|
19
|
+
- numpy
|
|
20
|
+
- pandas
|
|
21
|
+
- sklearn
|
|
22
|
+
- matplotlib
|
|
23
|
+
- scitex
|
|
24
|
+
|
|
25
|
+
IO:
|
|
26
|
+
- input-files:
|
|
27
|
+
- None (generates synthetic calendar-based data for demonstration)
|
|
28
|
+
- output-files:
|
|
29
|
+
- ./calendar_splits_demo.png (visualization with scatter plots)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
"""Imports"""
|
|
33
|
+
import os
|
|
34
|
+
import sys
|
|
35
|
+
import argparse
|
|
36
|
+
import numpy as np
|
|
37
|
+
import pandas as pd
|
|
38
|
+
from typing import Iterator, Optional, Tuple, Union, Literal
|
|
39
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
40
|
+
from sklearn.utils.validation import _num_samples
|
|
41
|
+
import matplotlib.pyplot as plt
|
|
42
|
+
import matplotlib.patches as patches
|
|
43
|
+
import scitex as stx
|
|
44
|
+
from scitex import logging
|
|
45
|
+
|
|
46
|
+
# Import timestamp normalizer (internally uses to_datetime helper)
|
|
47
|
+
from ._normalize_timestamp import normalize_timestamp, to_datetime
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class TimeSeriesCalendarSplit(BaseCrossValidator):
|
|
53
|
+
"""
|
|
54
|
+
Calendar-based time series cross-validation splitter.
|
|
55
|
+
|
|
56
|
+
Splits data based on calendar intervals (e.g., months, weeks, days).
|
|
57
|
+
Ensures temporal order is preserved and no data leakage occurs.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
interval : str
|
|
62
|
+
Time interval for splitting. Options:
|
|
63
|
+
- 'D': Daily
|
|
64
|
+
- 'W': Weekly
|
|
65
|
+
- 'M': Monthly
|
|
66
|
+
- 'Q': Quarterly
|
|
67
|
+
- 'Y': Yearly
|
|
68
|
+
Or any pandas frequency string
|
|
69
|
+
n_train_intervals : int
|
|
70
|
+
Number of intervals to use for training
|
|
71
|
+
n_test_intervals : int
|
|
72
|
+
Number of intervals to use for testing (default: 1)
|
|
73
|
+
gap_intervals : int
|
|
74
|
+
Number of intervals to skip between train and test (default: 0)
|
|
75
|
+
step_intervals : int
|
|
76
|
+
Number of intervals to step forward for next fold (default: 1)
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
>>> from scitex.ml.classification import TimeSeriesCalendarSplit
|
|
81
|
+
>>> import pandas as pd
|
|
82
|
+
>>> import numpy as np
|
|
83
|
+
>>>
|
|
84
|
+
>>> # Create sample data with daily timestamps
|
|
85
|
+
>>> dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')
|
|
86
|
+
>>> X = np.random.randn(len(dates), 10)
|
|
87
|
+
>>> y = np.random.randint(0, 2, len(dates))
|
|
88
|
+
>>>
|
|
89
|
+
>>> # Monthly splits: 6 months train, 1 month test
|
|
90
|
+
>>> tscal = TimeSeriesCalendarSplit(interval='M', n_train_intervals=6)
|
|
91
|
+
>>> for train_idx, test_idx in tscal.split(X, y, timestamps=dates):
|
|
92
|
+
... print(f"Train: {dates[train_idx[0]]:%Y-%m} to {dates[train_idx[-1]]:%Y-%m}")
|
|
93
|
+
... print(f"Test: {dates[test_idx[0]]:%Y-%m} to {dates[test_idx[-1]]:%Y-%m}")
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
interval: str = 'M',
|
|
99
|
+
n_train_intervals: int = 12,
|
|
100
|
+
n_test_intervals: int = 1,
|
|
101
|
+
n_val_intervals: int = 0,
|
|
102
|
+
gap_intervals: int = 0,
|
|
103
|
+
step_intervals: int = 1,
|
|
104
|
+
random_state: Optional[int] = None,
|
|
105
|
+
):
|
|
106
|
+
self.interval = interval
|
|
107
|
+
self.n_train_intervals = n_train_intervals
|
|
108
|
+
self.n_test_intervals = n_test_intervals
|
|
109
|
+
self.n_val_intervals = n_val_intervals
|
|
110
|
+
self.gap_intervals = gap_intervals
|
|
111
|
+
self.step_intervals = step_intervals
|
|
112
|
+
self.random_state = random_state
|
|
113
|
+
self.rng = np.random.default_rng(random_state)
|
|
114
|
+
|
|
115
|
+
def split(
|
|
116
|
+
self,
|
|
117
|
+
X: np.ndarray,
|
|
118
|
+
y: Optional[np.ndarray] = None,
|
|
119
|
+
timestamps: Optional[Union[np.ndarray, pd.DatetimeIndex]] = None,
|
|
120
|
+
groups: Optional[np.ndarray] = None,
|
|
121
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
122
|
+
"""
|
|
123
|
+
Generate calendar-based train/test splits.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
X : array-like, shape (n_samples, n_features)
|
|
128
|
+
Training data
|
|
129
|
+
y : array-like, shape (n_samples,), optional
|
|
130
|
+
Target variable
|
|
131
|
+
timestamps : array-like or pd.DatetimeIndex, shape (n_samples,)
|
|
132
|
+
Timestamps for each sample (required)
|
|
133
|
+
groups : array-like, shape (n_samples,), optional
|
|
134
|
+
Group labels (not used in this splitter)
|
|
135
|
+
|
|
136
|
+
Yields
|
|
137
|
+
------
|
|
138
|
+
train : ndarray
|
|
139
|
+
Training set indices
|
|
140
|
+
test : ndarray
|
|
141
|
+
Test set indices
|
|
142
|
+
"""
|
|
143
|
+
if timestamps is None:
|
|
144
|
+
raise ValueError("timestamps must be provided for calendar-based splitting")
|
|
145
|
+
|
|
146
|
+
n_samples = _num_samples(X)
|
|
147
|
+
indices = np.arange(n_samples)
|
|
148
|
+
|
|
149
|
+
# Convert timestamps to pandas datetime if needed
|
|
150
|
+
if not isinstance(timestamps, pd.DatetimeIndex):
|
|
151
|
+
# Use normalizer to handle various formats
|
|
152
|
+
# Convert each timestamp to datetime then to pandas DatetimeIndex
|
|
153
|
+
datetime_list = []
|
|
154
|
+
for ts in timestamps:
|
|
155
|
+
dt = to_datetime(ts)
|
|
156
|
+
# Remove timezone info for pandas compatibility
|
|
157
|
+
if dt.tzinfo is not None:
|
|
158
|
+
dt = dt.replace(tzinfo=None)
|
|
159
|
+
datetime_list.append(dt)
|
|
160
|
+
timestamps = pd.DatetimeIndex(datetime_list)
|
|
161
|
+
|
|
162
|
+
# Create DataFrame for easier manipulation
|
|
163
|
+
df = pd.DataFrame({
|
|
164
|
+
'index': indices,
|
|
165
|
+
'timestamp': timestamps
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
# Sort by timestamp
|
|
169
|
+
df = df.sort_values('timestamp')
|
|
170
|
+
|
|
171
|
+
# Group by the specified interval
|
|
172
|
+
df['interval'] = df['timestamp'].dt.to_period(self.interval)
|
|
173
|
+
unique_intervals = df['interval'].unique()
|
|
174
|
+
|
|
175
|
+
# Calculate total intervals needed per fold
|
|
176
|
+
intervals_per_fold = (
|
|
177
|
+
self.n_train_intervals +
|
|
178
|
+
self.gap_intervals +
|
|
179
|
+
self.n_test_intervals
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Generate splits
|
|
183
|
+
n_intervals = len(unique_intervals)
|
|
184
|
+
start_idx = 0
|
|
185
|
+
|
|
186
|
+
while start_idx + intervals_per_fold <= n_intervals:
|
|
187
|
+
# Define train intervals
|
|
188
|
+
train_end = start_idx + self.n_train_intervals
|
|
189
|
+
train_intervals = unique_intervals[start_idx:train_end]
|
|
190
|
+
|
|
191
|
+
# Define test intervals (after gap)
|
|
192
|
+
test_start = train_end + self.gap_intervals
|
|
193
|
+
test_end = test_start + self.n_test_intervals
|
|
194
|
+
|
|
195
|
+
if test_end > n_intervals:
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
test_intervals = unique_intervals[test_start:test_end]
|
|
199
|
+
|
|
200
|
+
# Get indices for train and test
|
|
201
|
+
train_mask = df['interval'].isin(train_intervals)
|
|
202
|
+
test_mask = df['interval'].isin(test_intervals)
|
|
203
|
+
|
|
204
|
+
train_indices = df.loc[train_mask, 'index'].values
|
|
205
|
+
test_indices = df.loc[test_mask, 'index'].values
|
|
206
|
+
|
|
207
|
+
yield train_indices, test_indices
|
|
208
|
+
|
|
209
|
+
# Move to next fold
|
|
210
|
+
start_idx += self.step_intervals
|
|
211
|
+
|
|
212
|
+
def split_with_val(
|
|
213
|
+
self,
|
|
214
|
+
X: np.ndarray,
|
|
215
|
+
y: Optional[np.ndarray] = None,
|
|
216
|
+
timestamps: Optional[Union[np.ndarray, pd.DatetimeIndex]] = None,
|
|
217
|
+
groups: Optional[np.ndarray] = None,
|
|
218
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
219
|
+
"""
|
|
220
|
+
Generate calendar-based train/validation/test splits.
|
|
221
|
+
|
|
222
|
+
The validation set comes after training but before test, maintaining
|
|
223
|
+
temporal order: train < val < test.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
X : array-like, shape (n_samples, n_features)
|
|
228
|
+
Training data
|
|
229
|
+
y : array-like, shape (n_samples,), optional
|
|
230
|
+
Target variable
|
|
231
|
+
timestamps : array-like or pd.DatetimeIndex, shape (n_samples,)
|
|
232
|
+
Timestamps for each sample (required)
|
|
233
|
+
groups : array-like, shape (n_samples,), optional
|
|
234
|
+
Group labels (not used in this splitter)
|
|
235
|
+
|
|
236
|
+
Yields
|
|
237
|
+
------
|
|
238
|
+
train : ndarray
|
|
239
|
+
Training set indices
|
|
240
|
+
val : ndarray
|
|
241
|
+
Validation set indices
|
|
242
|
+
test : ndarray
|
|
243
|
+
Test set indices
|
|
244
|
+
"""
|
|
245
|
+
if timestamps is None:
|
|
246
|
+
raise ValueError("timestamps must be provided for calendar-based splitting")
|
|
247
|
+
|
|
248
|
+
n_samples = _num_samples(X)
|
|
249
|
+
indices = np.arange(n_samples)
|
|
250
|
+
|
|
251
|
+
# Convert timestamps to pandas datetime if needed
|
|
252
|
+
if not isinstance(timestamps, pd.DatetimeIndex):
|
|
253
|
+
# Use normalizer to handle various formats
|
|
254
|
+
datetime_list = []
|
|
255
|
+
for ts in timestamps:
|
|
256
|
+
dt = to_datetime(ts)
|
|
257
|
+
# Remove timezone info for pandas compatibility
|
|
258
|
+
if dt.tzinfo is not None:
|
|
259
|
+
dt = dt.replace(tzinfo=None)
|
|
260
|
+
datetime_list.append(dt)
|
|
261
|
+
timestamps = pd.DatetimeIndex(datetime_list)
|
|
262
|
+
|
|
263
|
+
# Create DataFrame for easier manipulation
|
|
264
|
+
df = pd.DataFrame({
|
|
265
|
+
'index': indices,
|
|
266
|
+
'timestamp': timestamps
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
# Sort by timestamp
|
|
270
|
+
df = df.sort_values('timestamp')
|
|
271
|
+
|
|
272
|
+
# Group by the specified interval
|
|
273
|
+
df['interval'] = df['timestamp'].dt.to_period(self.interval)
|
|
274
|
+
unique_intervals = df['interval'].unique()
|
|
275
|
+
|
|
276
|
+
# Calculate total intervals needed per fold including validation
|
|
277
|
+
intervals_per_fold = (
|
|
278
|
+
self.n_train_intervals +
|
|
279
|
+
self.n_val_intervals +
|
|
280
|
+
self.gap_intervals +
|
|
281
|
+
self.n_test_intervals
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Generate splits
|
|
285
|
+
n_intervals = len(unique_intervals)
|
|
286
|
+
start_idx = 0
|
|
287
|
+
|
|
288
|
+
while start_idx + intervals_per_fold <= n_intervals:
|
|
289
|
+
# Define train intervals
|
|
290
|
+
train_end = start_idx + self.n_train_intervals
|
|
291
|
+
train_intervals = unique_intervals[start_idx:train_end]
|
|
292
|
+
|
|
293
|
+
# Define validation intervals (after train)
|
|
294
|
+
val_start = train_end
|
|
295
|
+
val_end = val_start + self.n_val_intervals
|
|
296
|
+
val_intervals = unique_intervals[val_start:val_end] if self.n_val_intervals > 0 else []
|
|
297
|
+
|
|
298
|
+
# Define test intervals (after validation and gap)
|
|
299
|
+
test_start = val_end + self.gap_intervals if self.n_val_intervals > 0 else train_end + self.gap_intervals
|
|
300
|
+
test_end = test_start + self.n_test_intervals
|
|
301
|
+
|
|
302
|
+
if test_end > n_intervals:
|
|
303
|
+
break
|
|
304
|
+
|
|
305
|
+
test_intervals = unique_intervals[test_start:test_end]
|
|
306
|
+
|
|
307
|
+
# Get indices for train, validation, and test
|
|
308
|
+
train_mask = df['interval'].isin(train_intervals)
|
|
309
|
+
val_mask = df['interval'].isin(val_intervals) if len(val_intervals) > 0 else pd.Series([False] * len(df))
|
|
310
|
+
test_mask = df['interval'].isin(test_intervals)
|
|
311
|
+
|
|
312
|
+
train_indices = df.loc[train_mask, 'index'].values
|
|
313
|
+
val_indices = df.loc[val_mask, 'index'].values if self.n_val_intervals > 0 else np.array([])
|
|
314
|
+
test_indices = df.loc[test_mask, 'index'].values
|
|
315
|
+
|
|
316
|
+
yield train_indices, val_indices, test_indices
|
|
317
|
+
|
|
318
|
+
# Move to next fold
|
|
319
|
+
start_idx += self.step_intervals
|
|
320
|
+
|
|
321
|
+
def get_n_splits(self, X=None, y=None, timestamps=None):
|
|
322
|
+
"""
|
|
323
|
+
Calculate number of splits.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
X : array-like, optional
|
|
328
|
+
Not used directly
|
|
329
|
+
y : array-like, optional
|
|
330
|
+
Not used
|
|
331
|
+
timestamps : array-like or pd.DatetimeIndex, optional
|
|
332
|
+
Timestamps to determine number of possible splits
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
n_splits : int
|
|
337
|
+
Number of splits. Returns -1 if timestamps is None.
|
|
338
|
+
"""
|
|
339
|
+
if timestamps is None:
|
|
340
|
+
return -1 # Can't determine without timestamps
|
|
341
|
+
|
|
342
|
+
# Convert timestamps to pandas datetime if needed
|
|
343
|
+
if not isinstance(timestamps, pd.DatetimeIndex):
|
|
344
|
+
# Use normalizer to handle various formats
|
|
345
|
+
# Convert each timestamp to datetime then to pandas DatetimeIndex
|
|
346
|
+
datetime_list = []
|
|
347
|
+
for ts in timestamps:
|
|
348
|
+
dt = to_datetime(ts)
|
|
349
|
+
# Remove timezone info for pandas compatibility
|
|
350
|
+
if dt.tzinfo is not None:
|
|
351
|
+
dt = dt.replace(tzinfo=None)
|
|
352
|
+
datetime_list.append(dt)
|
|
353
|
+
timestamps = pd.DatetimeIndex(datetime_list)
|
|
354
|
+
|
|
355
|
+
# Count unique intervals
|
|
356
|
+
intervals = timestamps.to_period(self.interval).unique()
|
|
357
|
+
n_intervals = len(intervals)
|
|
358
|
+
|
|
359
|
+
# Calculate how many complete folds we can create
|
|
360
|
+
intervals_per_fold = (
|
|
361
|
+
self.n_train_intervals +
|
|
362
|
+
self.gap_intervals +
|
|
363
|
+
self.n_test_intervals
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if n_intervals < intervals_per_fold:
|
|
367
|
+
return 0
|
|
368
|
+
|
|
369
|
+
# Calculate number of possible splits with stepping
|
|
370
|
+
n_splits = (n_intervals - intervals_per_fold) // self.step_intervals + 1
|
|
371
|
+
return max(0, n_splits)
|
|
372
|
+
|
|
373
|
+
def plot_splits(self, X, y=None, timestamps=None, figsize=(12, 6), save_path=None):
|
|
374
|
+
"""
|
|
375
|
+
Visualize the train/test splits as timeline rectangles with scatter plots.
|
|
376
|
+
|
|
377
|
+
Parameters
|
|
378
|
+
----------
|
|
379
|
+
X : array-like
|
|
380
|
+
Training data (used to determine data size)
|
|
381
|
+
y : array-like, optional
|
|
382
|
+
Target variable (used for color-coding scatter points)
|
|
383
|
+
timestamps : array-like or pd.DatetimeIndex
|
|
384
|
+
Timestamps for each sample
|
|
385
|
+
figsize : tuple, default (12, 6)
|
|
386
|
+
Figure size (width, height)
|
|
387
|
+
save_path : str, optional
|
|
388
|
+
Path to save the plot
|
|
389
|
+
|
|
390
|
+
Returns
|
|
391
|
+
-------
|
|
392
|
+
fig : matplotlib.figure.Figure
|
|
393
|
+
The created figure
|
|
394
|
+
|
|
395
|
+
Examples
|
|
396
|
+
--------
|
|
397
|
+
>>> splitter = TimeSeriesCalendarSplit(interval='M', n_train_intervals=6)
|
|
398
|
+
>>> fig = splitter.plot_splits(X, timestamps=dates)
|
|
399
|
+
>>> fig.savefig('calendar_splits.png')
|
|
400
|
+
"""
|
|
401
|
+
# matplotlib is always available in SciTeX
|
|
402
|
+
|
|
403
|
+
if timestamps is None:
|
|
404
|
+
raise ValueError("timestamps must be provided for calendar split visualization")
|
|
405
|
+
|
|
406
|
+
# Get all splits
|
|
407
|
+
splits = list(self.split(X, y, timestamps))
|
|
408
|
+
if not splits:
|
|
409
|
+
raise ValueError("No splits generated. Check data size and splitter parameters.")
|
|
410
|
+
|
|
411
|
+
# Convert timestamps for plotting
|
|
412
|
+
if not isinstance(timestamps, pd.DatetimeIndex):
|
|
413
|
+
datetime_list = []
|
|
414
|
+
for ts in timestamps:
|
|
415
|
+
dt = to_datetime(ts)
|
|
416
|
+
if dt.tzinfo is not None:
|
|
417
|
+
dt = dt.replace(tzinfo=None)
|
|
418
|
+
datetime_list.append(dt)
|
|
419
|
+
timestamps = pd.DatetimeIndex(datetime_list)
|
|
420
|
+
|
|
421
|
+
# Create figure
|
|
422
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
423
|
+
|
|
424
|
+
# Jitter strength for scatter plots
|
|
425
|
+
jitter_strength = 0.15
|
|
426
|
+
|
|
427
|
+
# Plot each fold
|
|
428
|
+
for fold, (train_idx, test_idx) in enumerate(splits):
|
|
429
|
+
y_pos = fold
|
|
430
|
+
|
|
431
|
+
# Train period rectangle
|
|
432
|
+
train_start = timestamps[train_idx[0]]
|
|
433
|
+
train_end = timestamps[train_idx[-1]]
|
|
434
|
+
train_width = (train_end - train_start).total_seconds() / 86400 # Convert to days
|
|
435
|
+
|
|
436
|
+
train_rect = patches.Rectangle(
|
|
437
|
+
(train_start, y_pos - 0.3),
|
|
438
|
+
pd.Timedelta(days=train_width), 0.6,
|
|
439
|
+
linewidth=1, edgecolor='blue', facecolor='lightblue', alpha=0.3,
|
|
440
|
+
label='Train Set (range)' if fold == 0 else ""
|
|
441
|
+
)
|
|
442
|
+
ax.add_patch(train_rect)
|
|
443
|
+
|
|
444
|
+
# Add scatter plot for training data points
|
|
445
|
+
train_times = timestamps[train_idx]
|
|
446
|
+
train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
|
|
447
|
+
|
|
448
|
+
# Color by class if y is provided
|
|
449
|
+
if y is not None:
|
|
450
|
+
train_colors = ['darkblue' if yi == 0 else 'navy' for yi in y[train_idx]]
|
|
451
|
+
else:
|
|
452
|
+
train_colors = 'darkblue'
|
|
453
|
+
|
|
454
|
+
ax.scatter(train_times, y_pos + train_jitter,
|
|
455
|
+
c=train_colors, s=20, alpha=0.6, marker='o',
|
|
456
|
+
label='Train data points' if fold == 0 else "", zorder=10)
|
|
457
|
+
|
|
458
|
+
# Test period rectangle
|
|
459
|
+
test_start = timestamps[test_idx[0]]
|
|
460
|
+
test_end = timestamps[test_idx[-1]]
|
|
461
|
+
test_width = (test_end - test_start).total_seconds() / 86400
|
|
462
|
+
|
|
463
|
+
test_rect = patches.Rectangle(
|
|
464
|
+
(test_start, y_pos - 0.3),
|
|
465
|
+
pd.Timedelta(days=test_width), 0.6,
|
|
466
|
+
linewidth=1, edgecolor='red', facecolor='lightcoral', alpha=0.3,
|
|
467
|
+
label='Test Set (range)' if fold == 0 else ""
|
|
468
|
+
)
|
|
469
|
+
ax.add_patch(test_rect)
|
|
470
|
+
|
|
471
|
+
# Add scatter plot for test data points
|
|
472
|
+
test_times = timestamps[test_idx]
|
|
473
|
+
test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
|
|
474
|
+
|
|
475
|
+
# Color by class if y is provided
|
|
476
|
+
if y is not None:
|
|
477
|
+
test_colors = ['darkred' if yi == 0 else 'firebrick' for yi in y[test_idx]]
|
|
478
|
+
else:
|
|
479
|
+
test_colors = 'darkred'
|
|
480
|
+
|
|
481
|
+
ax.scatter(test_times, y_pos + test_jitter,
|
|
482
|
+
c=test_colors, s=20, alpha=0.6, marker='^',
|
|
483
|
+
label='Test data points' if fold == 0 else "", zorder=10)
|
|
484
|
+
|
|
485
|
+
# Format plot
|
|
486
|
+
ax.set_ylim(-0.5, len(splits) - 0.5)
|
|
487
|
+
ax.set_xlim(timestamps.min(), timestamps.max())
|
|
488
|
+
ax.set_xlabel('Time')
|
|
489
|
+
ax.set_ylabel('Fold')
|
|
490
|
+
ax.set_title(f'Time Series Calendar Split Visualization\\n'
|
|
491
|
+
f'Interval: {self.interval}, Train: {self.n_train_intervals}, '
|
|
492
|
+
f'Test: {self.n_test_intervals}')
|
|
493
|
+
|
|
494
|
+
# Set y-ticks
|
|
495
|
+
ax.set_yticks(range(len(splits)))
|
|
496
|
+
ax.set_yticklabels([f'Fold {i}' for i in range(len(splits))])
|
|
497
|
+
|
|
498
|
+
# Add legend
|
|
499
|
+
ax.legend(loc='upper right')
|
|
500
|
+
|
|
501
|
+
# Format x-axis
|
|
502
|
+
ax.tick_params(axis='x', rotation=45)
|
|
503
|
+
|
|
504
|
+
plt.tight_layout()
|
|
505
|
+
|
|
506
|
+
if save_path:
|
|
507
|
+
fig.savefig(save_path, dpi=150, bbox_inches='tight')
|
|
508
|
+
|
|
509
|
+
return fig
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
"""Functions & Classes"""
|
|
513
|
+
def main(args) -> int:
|
|
514
|
+
"""Demonstrate TimeSeriesCalendarSplit functionality.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
args: Command line arguments
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
int: Exit status
|
|
521
|
+
"""
|
|
522
|
+
logger.info("Demonstrating TimeSeriesCalendarSplit functionality")
|
|
523
|
+
|
|
524
|
+
# Generate test data with calendar-based timestamps
|
|
525
|
+
np.random.seed(42)
|
|
526
|
+
n_samples = args.n_samples
|
|
527
|
+
|
|
528
|
+
# Create daily timestamps over several months
|
|
529
|
+
start_date = pd.Timestamp(args.start_date)
|
|
530
|
+
timestamps = pd.date_range(start=start_date, periods=n_samples, freq=args.data_freq)
|
|
531
|
+
|
|
532
|
+
# Generate features and target
|
|
533
|
+
X = np.random.randn(n_samples, 5)
|
|
534
|
+
y = np.random.randint(0, 2, n_samples)
|
|
535
|
+
|
|
536
|
+
logger.info(f"Generated test data: {n_samples} samples")
|
|
537
|
+
logger.info(f"Date range: {timestamps[0].strftime('%Y-%m-%d')} to {timestamps[-1].strftime('%Y-%m-%d')}")
|
|
538
|
+
logger.info(f"Data frequency: {args.data_freq}")
|
|
539
|
+
|
|
540
|
+
# Create calendar splitter
|
|
541
|
+
splitter = TimeSeriesCalendarSplit(
|
|
542
|
+
interval=args.interval,
|
|
543
|
+
n_train_intervals=args.n_train_intervals,
|
|
544
|
+
n_test_intervals=args.n_test_intervals,
|
|
545
|
+
gap_intervals=args.gap_intervals,
|
|
546
|
+
step_intervals=args.step_intervals
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
logger.info(f"Calendar split configuration:")
|
|
550
|
+
logger.info(f" Interval: {args.interval}")
|
|
551
|
+
logger.info(f" Train intervals: {args.n_train_intervals}")
|
|
552
|
+
logger.info(f" Test intervals: {args.n_test_intervals}")
|
|
553
|
+
logger.info(f" Gap intervals: {args.gap_intervals}")
|
|
554
|
+
logger.info(f" Step intervals: {args.step_intervals}")
|
|
555
|
+
|
|
556
|
+
# Test splits
|
|
557
|
+
splits = []
|
|
558
|
+
for fold, (train_idx, test_idx) in enumerate(splitter.split(X, y, timestamps=timestamps)):
|
|
559
|
+
if fold >= args.max_folds:
|
|
560
|
+
break
|
|
561
|
+
splits.append((train_idx, test_idx))
|
|
562
|
+
|
|
563
|
+
train_start = timestamps[train_idx[0]].strftime('%Y-%m-%d')
|
|
564
|
+
train_end = timestamps[train_idx[-1]].strftime('%Y-%m-%d')
|
|
565
|
+
test_start = timestamps[test_idx[0]].strftime('%Y-%m-%d')
|
|
566
|
+
test_end = timestamps[test_idx[-1]].strftime('%Y-%m-%d')
|
|
567
|
+
|
|
568
|
+
logger.info(f"Fold {fold}:")
|
|
569
|
+
logger.info(f" Train: {train_start} to {train_end} ({len(train_idx)} samples)")
|
|
570
|
+
logger.info(f" Test: {test_start} to {test_end} ({len(test_idx)} samples)")
|
|
571
|
+
|
|
572
|
+
# Verify temporal order
|
|
573
|
+
train_times = timestamps[train_idx]
|
|
574
|
+
test_times = timestamps[test_idx]
|
|
575
|
+
temporal_ok = train_times.max() < test_times.min()
|
|
576
|
+
status = "✓" if temporal_ok else "✗"
|
|
577
|
+
logger.info(f" Temporal order: {status}")
|
|
578
|
+
|
|
579
|
+
# Generate visualization
|
|
580
|
+
logger.info("Generating calendar split visualization")
|
|
581
|
+
fig = splitter.plot_splits(X, y, timestamps)
|
|
582
|
+
|
|
583
|
+
# Save using SciTeX framework
|
|
584
|
+
stx.io.save(fig, "./calendar_splits_demo.png", symlink_from_cwd=True)
|
|
585
|
+
plt.close(fig)
|
|
586
|
+
|
|
587
|
+
logger.info("TimeSeriesCalendarSplit demonstration completed successfully")
|
|
588
|
+
return 0
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def parse_args() -> argparse.Namespace:
|
|
592
|
+
"""Parse command line arguments."""
|
|
593
|
+
parser = argparse.ArgumentParser(
|
|
594
|
+
description='Demonstrate TimeSeriesCalendarSplit with calendar-based intervals'
|
|
595
|
+
)
|
|
596
|
+
parser.add_argument(
|
|
597
|
+
"--n-samples",
|
|
598
|
+
type=int,
|
|
599
|
+
default=365,
|
|
600
|
+
help="Number of samples to generate (default: %(default)s)",
|
|
601
|
+
)
|
|
602
|
+
parser.add_argument(
|
|
603
|
+
"--start-date",
|
|
604
|
+
type=str,
|
|
605
|
+
default="2023-01-01",
|
|
606
|
+
help="Start date for time series (default: %(default)s)",
|
|
607
|
+
)
|
|
608
|
+
parser.add_argument(
|
|
609
|
+
"--data-freq",
|
|
610
|
+
type=str,
|
|
611
|
+
default="D",
|
|
612
|
+
help="Frequency of data points (D=daily, H=hourly) (default: %(default)s)",
|
|
613
|
+
)
|
|
614
|
+
parser.add_argument(
|
|
615
|
+
"--interval",
|
|
616
|
+
type=str,
|
|
617
|
+
default="M",
|
|
618
|
+
help="Calendar interval (D=daily, W=weekly, M=monthly) (default: %(default)s)",
|
|
619
|
+
)
|
|
620
|
+
parser.add_argument(
|
|
621
|
+
"--n-train-intervals",
|
|
622
|
+
type=int,
|
|
623
|
+
default=6,
|
|
624
|
+
help="Number of intervals for training (default: %(default)s)",
|
|
625
|
+
)
|
|
626
|
+
parser.add_argument(
|
|
627
|
+
"--n-test-intervals",
|
|
628
|
+
type=int,
|
|
629
|
+
default=1,
|
|
630
|
+
help="Number of intervals for testing (default: %(default)s)",
|
|
631
|
+
)
|
|
632
|
+
parser.add_argument(
|
|
633
|
+
"--gap-intervals",
|
|
634
|
+
type=int,
|
|
635
|
+
default=0,
|
|
636
|
+
help="Gap intervals between train and test (default: %(default)s)",
|
|
637
|
+
)
|
|
638
|
+
parser.add_argument(
|
|
639
|
+
"--step-intervals",
|
|
640
|
+
type=int,
|
|
641
|
+
default=1,
|
|
642
|
+
help="Step intervals between folds (default: %(default)s)",
|
|
643
|
+
)
|
|
644
|
+
parser.add_argument(
|
|
645
|
+
"--max-folds",
|
|
646
|
+
type=int,
|
|
647
|
+
default=3,
|
|
648
|
+
help="Maximum number of folds to demonstrate (default: %(default)s)",
|
|
649
|
+
)
|
|
650
|
+
args = parser.parse_args()
|
|
651
|
+
return args
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def run_main() -> None:
|
|
655
|
+
"""Initialize scitex framework, run main function, and cleanup."""
|
|
656
|
+
global CONFIG, CC, sys, plt, rng
|
|
657
|
+
|
|
658
|
+
import sys
|
|
659
|
+
import matplotlib.pyplot as plt
|
|
660
|
+
import scitex as stx
|
|
661
|
+
|
|
662
|
+
args = parse_args()
|
|
663
|
+
|
|
664
|
+
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
|
|
665
|
+
sys,
|
|
666
|
+
plt,
|
|
667
|
+
args=args,
|
|
668
|
+
file=__FILE__,
|
|
669
|
+
sdir_suffix=None,
|
|
670
|
+
verbose=False,
|
|
671
|
+
agg=True,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
exit_status = main(args)
|
|
675
|
+
|
|
676
|
+
stx.session.close(
|
|
677
|
+
CONFIG,
|
|
678
|
+
verbose=False,
|
|
679
|
+
notify=False,
|
|
680
|
+
message="",
|
|
681
|
+
exit_status=exit_status,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
if __name__ == '__main__':
|
|
686
|
+
run_main()
|
|
687
|
+
|
|
688
|
+
# EOF
|