scitex 2.14.0__py3-none-any.whl → 2.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +71 -17
- scitex/_env_loader.py +156 -0
- scitex/_mcp_resources/__init__.py +37 -0
- scitex/_mcp_resources/_cheatsheet.py +135 -0
- scitex/_mcp_resources/_figrecipe.py +138 -0
- scitex/_mcp_resources/_formats.py +102 -0
- scitex/_mcp_resources/_modules.py +337 -0
- scitex/_mcp_resources/_session.py +149 -0
- scitex/_mcp_tools/__init__.py +4 -0
- scitex/_mcp_tools/audio.py +66 -0
- scitex/_mcp_tools/diagram.py +11 -95
- scitex/_mcp_tools/introspect.py +210 -0
- scitex/_mcp_tools/plt.py +260 -305
- scitex/_mcp_tools/scholar.py +74 -0
- scitex/_mcp_tools/social.py +244 -0
- scitex/_mcp_tools/template.py +24 -0
- scitex/_mcp_tools/writer.py +21 -204
- scitex/ai/_gen_ai/_PARAMS.py +10 -7
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
- scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
- scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
- scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
- scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
- scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
- scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
- scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
- scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
- scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
- scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
- scitex/audio/README.md +40 -36
- scitex/audio/__init__.py +129 -61
- scitex/audio/_branding.py +185 -0
- scitex/audio/_mcp/__init__.py +32 -0
- scitex/audio/_mcp/handlers.py +59 -6
- scitex/audio/_mcp/speak_handlers.py +238 -0
- scitex/audio/_relay.py +225 -0
- scitex/audio/_tts.py +18 -10
- scitex/audio/engines/base.py +17 -10
- scitex/audio/engines/elevenlabs_engine.py +7 -2
- scitex/audio/mcp_server.py +228 -75
- scitex/canvas/README.md +1 -1
- scitex/canvas/editor/_dearpygui/__init__.py +25 -0
- scitex/canvas/editor/_dearpygui/_editor.py +147 -0
- scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
- scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
- scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
- scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
- scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
- scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
- scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
- scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
- scitex/canvas/editor/_dearpygui/_selection.py +295 -0
- scitex/canvas/editor/_dearpygui/_state.py +93 -0
- scitex/canvas/editor/_dearpygui/_utils.py +61 -0
- scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
- scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
- scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
- scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
- scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
- scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
- scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
- scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
- scitex/canvas/editor/flask_editor/_core.py +25 -1684
- scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
- scitex/cli/__init__.py +38 -43
- scitex/cli/audio.py +76 -27
- scitex/cli/capture.py +13 -20
- scitex/cli/introspect.py +481 -0
- scitex/cli/main.py +200 -109
- scitex/cli/mcp.py +60 -34
- scitex/cli/plt.py +357 -0
- scitex/cli/repro.py +15 -8
- scitex/cli/resource.py +15 -8
- scitex/cli/scholar/__init__.py +23 -8
- scitex/cli/scholar/_crossref_scitex.py +296 -0
- scitex/cli/scholar/_fetch.py +25 -3
- scitex/cli/social.py +314 -0
- scitex/cli/stats.py +15 -8
- scitex/cli/template.py +129 -12
- scitex/cli/tex.py +15 -8
- scitex/cli/writer.py +132 -8
- scitex/cloud/__init__.py +41 -2
- scitex/config/README.md +1 -1
- scitex/config/__init__.py +16 -2
- scitex/config/_env_registry.py +256 -0
- scitex/context/__init__.py +22 -0
- scitex/dev/__init__.py +20 -1
- scitex/diagram/__init__.py +42 -19
- scitex/diagram/mcp_server.py +13 -125
- scitex/gen/__init__.py +50 -14
- scitex/gen/_list_packages.py +4 -4
- scitex/introspect/__init__.py +82 -0
- scitex/introspect/_call_graph.py +303 -0
- scitex/introspect/_class_hierarchy.py +163 -0
- scitex/introspect/_core.py +41 -0
- scitex/introspect/_docstring.py +131 -0
- scitex/introspect/_examples.py +113 -0
- scitex/introspect/_imports.py +271 -0
- scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
- scitex/introspect/_mcp/__init__.py +41 -0
- scitex/introspect/_mcp/handlers.py +233 -0
- scitex/introspect/_members.py +155 -0
- scitex/introspect/_resolve.py +89 -0
- scitex/introspect/_signature.py +131 -0
- scitex/introspect/_source.py +80 -0
- scitex/introspect/_type_hints.py +172 -0
- scitex/io/_save.py +1 -2
- scitex/io/bundle/README.md +1 -1
- scitex/logging/_formatters.py +19 -9
- scitex/mcp_server.py +98 -5
- scitex/os/__init__.py +4 -0
- scitex/{gen → os}/_check_host.py +4 -5
- scitex/plt/__init__.py +245 -550
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
- scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/plt/gallery/README.md +1 -1
- scitex/plt/utils/_hitmap/__init__.py +82 -0
- scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
- scitex/plt/utils/_hitmap/_color_application.py +346 -0
- scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
- scitex/plt/utils/_hitmap/_constants.py +40 -0
- scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
- scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
- scitex/plt/utils/_hitmap/_query.py +113 -0
- scitex/plt/utils/_hitmap.py +46 -1616
- scitex/plt/utils/_metadata/__init__.py +80 -0
- scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
- scitex/plt/utils/_metadata/_artists/_base.py +195 -0
- scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
- scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
- scitex/plt/utils/_metadata/_artists/_images.py +80 -0
- scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
- scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
- scitex/plt/utils/_metadata/_artists/_text.py +106 -0
- scitex/plt/utils/_metadata/_csv.py +416 -0
- scitex/plt/utils/_metadata/_detect.py +225 -0
- scitex/plt/utils/_metadata/_legend.py +127 -0
- scitex/plt/utils/_metadata/_rounding.py +117 -0
- scitex/plt/utils/_metadata/_verification.py +202 -0
- scitex/schema/README.md +1 -1
- scitex/scholar/__init__.py +8 -0
- scitex/scholar/_mcp/crossref_handlers.py +265 -0
- scitex/scholar/core/Scholar.py +63 -1700
- scitex/scholar/core/_mixins/__init__.py +36 -0
- scitex/scholar/core/_mixins/_enrichers.py +270 -0
- scitex/scholar/core/_mixins/_library_handlers.py +100 -0
- scitex/scholar/core/_mixins/_loaders.py +103 -0
- scitex/scholar/core/_mixins/_pdf_download.py +375 -0
- scitex/scholar/core/_mixins/_pipeline.py +312 -0
- scitex/scholar/core/_mixins/_project_handlers.py +125 -0
- scitex/scholar/core/_mixins/_savers.py +69 -0
- scitex/scholar/core/_mixins/_search.py +103 -0
- scitex/scholar/core/_mixins/_services.py +88 -0
- scitex/scholar/core/_mixins/_url_finding.py +105 -0
- scitex/scholar/crossref_scitex.py +367 -0
- scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/scholar/examples/00_run_all.sh +120 -0
- scitex/scholar/jobs/_executors.py +27 -3
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
- scitex/scholar/pdf_download/_cli.py +154 -0
- scitex/scholar/pdf_download/strategies/__init__.py +11 -8
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
- scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
- scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
- scitex/scholar/pipelines/_single_steps.py +71 -36
- scitex/scholar/storage/_LibraryManager.py +97 -1695
- scitex/scholar/storage/_mixins/__init__.py +30 -0
- scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
- scitex/scholar/storage/_mixins/_library_operations.py +218 -0
- scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
- scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
- scitex/scholar/storage/_mixins/_resolution.py +376 -0
- scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
- scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
- scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
- scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
- scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
- scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
- scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
- scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
- scitex/security/README.md +3 -3
- scitex/session/README.md +1 -1
- scitex/session/__init__.py +26 -7
- scitex/session/_decorator.py +1 -1
- scitex/sh/README.md +1 -1
- scitex/sh/__init__.py +7 -4
- scitex/social/__init__.py +155 -0
- scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/stats/_mcp/_handlers/__init__.py +31 -0
- scitex/stats/_mcp/_handlers/_corrections.py +113 -0
- scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
- scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
- scitex/stats/_mcp/_handlers/_format.py +94 -0
- scitex/stats/_mcp/_handlers/_normality.py +110 -0
- scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
- scitex/stats/_mcp/_handlers/_power.py +247 -0
- scitex/stats/_mcp/_handlers/_recommend.py +102 -0
- scitex/stats/_mcp/_handlers/_run_test.py +279 -0
- scitex/stats/_mcp/_handlers/_stars.py +48 -0
- scitex/stats/_mcp/handlers.py +19 -1171
- scitex/stats/auto/_stat_style.py +175 -0
- scitex/stats/auto/_style_definitions.py +411 -0
- scitex/stats/auto/_styles.py +22 -620
- scitex/stats/descriptive/__init__.py +11 -8
- scitex/stats/descriptive/_ci.py +39 -0
- scitex/stats/power/_power.py +15 -4
- scitex/str/__init__.py +2 -1
- scitex/str/_title_case.py +63 -0
- scitex/template/README.md +1 -1
- scitex/template/__init__.py +25 -10
- scitex/template/_code_templates.py +147 -0
- scitex/template/_mcp/handlers.py +81 -0
- scitex/template/_mcp/tool_schemas.py +55 -0
- scitex/template/_templates/__init__.py +51 -0
- scitex/template/_templates/audio.py +233 -0
- scitex/template/_templates/canvas.py +312 -0
- scitex/template/_templates/capture.py +268 -0
- scitex/template/_templates/config.py +43 -0
- scitex/template/_templates/diagram.py +294 -0
- scitex/template/_templates/io.py +107 -0
- scitex/template/_templates/module.py +53 -0
- scitex/template/_templates/plt.py +202 -0
- scitex/template/_templates/scholar.py +267 -0
- scitex/template/_templates/session.py +130 -0
- scitex/template/_templates/session_minimal.py +43 -0
- scitex/template/_templates/session_plot.py +67 -0
- scitex/template/_templates/session_stats.py +77 -0
- scitex/template/_templates/stats.py +323 -0
- scitex/template/_templates/writer.py +296 -0
- scitex/template/clone_writer_directory.py +5 -5
- scitex/ui/_backends/_email.py +10 -2
- scitex/ui/_backends/_webhook.py +5 -1
- scitex/web/_search_pubmed.py +10 -6
- scitex/writer/README.md +1 -1
- scitex/writer/_mcp/handlers.py +11 -744
- scitex/writer/_mcp/tool_schemas.py +5 -335
- scitex-2.15.2.dist-info/METADATA +648 -0
- {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/RECORD +246 -150
- scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
- scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
- scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
- scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
- scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
- scitex/diagram/_compile.py +0 -312
- scitex/diagram/_diagram.py +0 -355
- scitex/diagram/_mcp/__init__.py +0 -4
- scitex/diagram/_mcp/handlers.py +0 -400
- scitex/diagram/_mcp/tool_schemas.py +0 -157
- scitex/diagram/_presets.py +0 -173
- scitex/diagram/_schema.py +0 -182
- scitex/diagram/_split.py +0 -278
- scitex/gen/_ci.py +0 -12
- scitex/gen/_title_case.py +0 -89
- scitex/plt/_mcp/__init__.py +0 -4
- scitex/plt/_mcp/_handlers_annotation.py +0 -102
- scitex/plt/_mcp/_handlers_figure.py +0 -195
- scitex/plt/_mcp/_handlers_plot.py +0 -252
- scitex/plt/_mcp/_handlers_style.py +0 -219
- scitex/plt/_mcp/handlers.py +0 -74
- scitex/plt/_mcp/tool_schemas.py +0 -497
- scitex/plt/mcp_server.py +0 -231
- scitex/scholar/data/.gitkeep +0 -0
- scitex/scholar/data/README.md +0 -44
- scitex/scholar/data/bib_files/bibliography.bib +0 -1952
- scitex/scholar/data/bib_files/neurovista.bib +0 -277
- scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
- scitex/scholar/data/bib_files/openaccess.bib +0 -89
- scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
- scitex/scholar/data/bib_files/pac.bib +0 -698
- scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
- scitex/scholar/data/bib_files/pac_processed.bib +0 -0
- scitex/scholar/data/bib_files/pac_titles.txt +0 -75
- scitex/scholar/data/bib_files/paywalled.bib +0 -98
- scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
- scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
- scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
- scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
- scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_seizure.bib +0 -46
- scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
- scitex/scholar/data/impact_factor.db +0 -0
- scitex/scholar/examples/SUGGESTIONS.md +0 -865
- scitex/scholar/examples/dev.py +0 -38
- scitex-2.14.0.dist-info/METADATA +0 -1238
- /scitex/{gen → context}/_detect_environment.py +0 -0
- /scitex/{gen → context}/_get_notebook_path.py +0 -0
- /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: "2026-01-24 (ywatanabe)"
|
|
3
|
+
# File: /home/ywatanabe/proj/scitex-python/src/scitex/ai/classification/timeseries/_sliding_window_core.py
|
|
4
|
+
|
|
5
|
+
"""Core TimeSeriesSlidingWindowSplit class without visualization."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Iterator, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
13
|
+
from sklearn.utils.validation import _num_samples
|
|
14
|
+
|
|
15
|
+
from scitex import logging
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
__all__ = ["TimeSeriesSlidingWindowSplitCore"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TimeSeriesSlidingWindowSplitCore(BaseCrossValidator):
|
|
23
|
+
"""Sliding window cross-validation for time series (core functionality).
|
|
24
|
+
|
|
25
|
+
Creates train/test windows that slide through time with configurable behavior.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
window_size : int, optional
|
|
30
|
+
Size of training window (ignored if expanding_window=True or n_splits is set).
|
|
31
|
+
Required if n_splits is None.
|
|
32
|
+
step_size : int, optional
|
|
33
|
+
Step between windows (overridden if overlapping_tests=False)
|
|
34
|
+
test_size : int, optional
|
|
35
|
+
Size of test window. Required if n_splits is None.
|
|
36
|
+
gap : int, default=0
|
|
37
|
+
Number of samples to skip between train and test windows
|
|
38
|
+
val_ratio : float, default=0.0
|
|
39
|
+
Ratio of validation set from training window
|
|
40
|
+
random_state : int, optional
|
|
41
|
+
Random seed for reproducibility
|
|
42
|
+
overlapping_tests : bool, default=False
|
|
43
|
+
If False, automatically sets step_size=test_size to ensure each sample
|
|
44
|
+
is tested exactly once (like K-fold for time series)
|
|
45
|
+
expanding_window : bool, default=False
|
|
46
|
+
If True, training window grows to include all past data (like sklearn's
|
|
47
|
+
TimeSeriesSplit). If False, uses fixed sliding window of size window_size.
|
|
48
|
+
undersample : bool, default=False
|
|
49
|
+
If True, balance classes in training sets by randomly undersampling
|
|
50
|
+
the majority class to match the minority class count. Temporal order
|
|
51
|
+
is maintained. Requires y labels in split().
|
|
52
|
+
n_splits : int, optional
|
|
53
|
+
Number of splits to generate. If specified, window_size and test_size
|
|
54
|
+
are automatically calculated to create exactly n_splits folds.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
window_size: Optional[int] = None,
|
|
60
|
+
step_size: Optional[int] = None,
|
|
61
|
+
test_size: Optional[int] = None,
|
|
62
|
+
gap: int = 0,
|
|
63
|
+
val_ratio: float = 0.0,
|
|
64
|
+
random_state: Optional[int] = None,
|
|
65
|
+
overlapping_tests: bool = False,
|
|
66
|
+
expanding_window: bool = False,
|
|
67
|
+
undersample: bool = False,
|
|
68
|
+
n_splits: Optional[int] = None,
|
|
69
|
+
):
|
|
70
|
+
# Handle n_splits mode vs manual mode
|
|
71
|
+
if n_splits is not None:
|
|
72
|
+
self.n_splits_mode = True
|
|
73
|
+
self._n_splits = n_splits
|
|
74
|
+
self.window_size = window_size if window_size is not None else 50
|
|
75
|
+
self.test_size = test_size if test_size is not None else 10
|
|
76
|
+
else:
|
|
77
|
+
if window_size is None or test_size is None:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"Either n_splits OR (window_size AND test_size) must be specified"
|
|
80
|
+
)
|
|
81
|
+
self.n_splits_mode = False
|
|
82
|
+
self._n_splits = None
|
|
83
|
+
self.window_size = window_size
|
|
84
|
+
self.test_size = test_size
|
|
85
|
+
|
|
86
|
+
self.gap = gap
|
|
87
|
+
self.val_ratio = val_ratio
|
|
88
|
+
self.random_state = random_state
|
|
89
|
+
self.rng = np.random.default_rng(random_state)
|
|
90
|
+
self.overlapping_tests = overlapping_tests
|
|
91
|
+
self.expanding_window = expanding_window
|
|
92
|
+
self.undersample = undersample
|
|
93
|
+
|
|
94
|
+
# Handle step_size logic
|
|
95
|
+
if not overlapping_tests:
|
|
96
|
+
if step_size is not None and step_size < test_size:
|
|
97
|
+
logger.warning(
|
|
98
|
+
f"overlapping_tests=False but step_size={step_size} < test_size={test_size}. "
|
|
99
|
+
f"Setting step_size=test_size={test_size}."
|
|
100
|
+
)
|
|
101
|
+
self.step_size = test_size
|
|
102
|
+
elif step_size is None:
|
|
103
|
+
self.step_size = test_size
|
|
104
|
+
logger.info(
|
|
105
|
+
f"step_size not specified with overlapping_tests=False. "
|
|
106
|
+
f"Using step_size=test_size={test_size}."
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
self.step_size = step_size
|
|
110
|
+
else:
|
|
111
|
+
if step_size is None:
|
|
112
|
+
self.step_size = max(1, test_size // 2)
|
|
113
|
+
logger.info(
|
|
114
|
+
f"step_size not specified with overlapping_tests=True. "
|
|
115
|
+
f"Using step_size={self.step_size} (50% overlap)."
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
self.step_size = step_size
|
|
119
|
+
|
|
120
|
+
def _undersample_indices(
|
|
121
|
+
self, train_indices: np.ndarray, y: np.ndarray, timestamps: np.ndarray
|
|
122
|
+
) -> np.ndarray:
|
|
123
|
+
"""Undersample majority class to balance training set.
|
|
124
|
+
|
|
125
|
+
Maintains temporal order of samples.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
train_indices : ndarray
|
|
130
|
+
Original training indices
|
|
131
|
+
y : ndarray
|
|
132
|
+
Full label array
|
|
133
|
+
timestamps : ndarray
|
|
134
|
+
Full timestamp array
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
ndarray
|
|
139
|
+
Undersampled training indices (sorted by timestamp)
|
|
140
|
+
"""
|
|
141
|
+
train_labels = y[train_indices]
|
|
142
|
+
unique_classes, class_counts = np.unique(train_labels, return_counts=True)
|
|
143
|
+
|
|
144
|
+
if len(unique_classes) < 2:
|
|
145
|
+
return train_indices
|
|
146
|
+
|
|
147
|
+
min_count = class_counts.min()
|
|
148
|
+
|
|
149
|
+
undersampled_indices = []
|
|
150
|
+
for cls in unique_classes:
|
|
151
|
+
cls_mask = train_labels == cls
|
|
152
|
+
cls_train_indices = train_indices[cls_mask]
|
|
153
|
+
|
|
154
|
+
if len(cls_train_indices) > min_count:
|
|
155
|
+
selected = self.rng.choice(
|
|
156
|
+
cls_train_indices, size=min_count, replace=False
|
|
157
|
+
)
|
|
158
|
+
undersampled_indices.extend(selected)
|
|
159
|
+
else:
|
|
160
|
+
undersampled_indices.extend(cls_train_indices)
|
|
161
|
+
|
|
162
|
+
undersampled_indices = np.array(undersampled_indices)
|
|
163
|
+
temporal_order = np.argsort(timestamps[undersampled_indices])
|
|
164
|
+
undersampled_indices = undersampled_indices[temporal_order]
|
|
165
|
+
|
|
166
|
+
return undersampled_indices
|
|
167
|
+
|
|
168
|
+
def _calculate_auto_sizes(self, n_samples: int) -> None:
|
|
169
|
+
"""Auto-calculate window and test sizes for n_splits mode."""
|
|
170
|
+
if self.expanding_window:
|
|
171
|
+
min_window_size = max(1, n_samples // (self._n_splits + 1))
|
|
172
|
+
available_for_test = (
|
|
173
|
+
n_samples - min_window_size - (self._n_splits * self.gap)
|
|
174
|
+
)
|
|
175
|
+
calculated_test_size = max(1, available_for_test // self._n_splits)
|
|
176
|
+
|
|
177
|
+
self.window_size = min_window_size
|
|
178
|
+
self.test_size = calculated_test_size
|
|
179
|
+
self.step_size = calculated_test_size
|
|
180
|
+
|
|
181
|
+
logger.info(
|
|
182
|
+
f"n_splits={self._n_splits} with expanding_window: "
|
|
183
|
+
f"Calculated window_size={self.window_size}, test_size={self.test_size}"
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
available = n_samples - (self._n_splits * self.gap)
|
|
187
|
+
calculated_test_size = max(1, available // (self._n_splits + 1))
|
|
188
|
+
calculated_window_size = calculated_test_size
|
|
189
|
+
|
|
190
|
+
self.window_size = calculated_window_size
|
|
191
|
+
self.test_size = calculated_test_size
|
|
192
|
+
self.step_size = calculated_test_size
|
|
193
|
+
|
|
194
|
+
logger.info(
|
|
195
|
+
f"n_splits={self._n_splits} with fixed window: "
|
|
196
|
+
f"Calculated window_size={self.window_size}, test_size={self.test_size}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def split(
|
|
200
|
+
self,
|
|
201
|
+
X: np.ndarray,
|
|
202
|
+
y: Optional[np.ndarray] = None,
|
|
203
|
+
timestamps: Optional[np.ndarray] = None,
|
|
204
|
+
groups: Optional[np.ndarray] = None,
|
|
205
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
206
|
+
"""Generate sliding window splits.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
X : array-like, shape (n_samples, n_features)
|
|
211
|
+
Training data
|
|
212
|
+
y : array-like, shape (n_samples,), optional
|
|
213
|
+
Target variable
|
|
214
|
+
timestamps : array-like, shape (n_samples,), optional
|
|
215
|
+
Timestamps for temporal ordering. If None, uses sequential order
|
|
216
|
+
groups : array-like, shape (n_samples,), optional
|
|
217
|
+
Group labels (not used in this splitter)
|
|
218
|
+
|
|
219
|
+
Yields
|
|
220
|
+
------
|
|
221
|
+
train : ndarray
|
|
222
|
+
Training set indices
|
|
223
|
+
test : ndarray
|
|
224
|
+
Test set indices
|
|
225
|
+
"""
|
|
226
|
+
if timestamps is None:
|
|
227
|
+
timestamps = np.arange(len(X))
|
|
228
|
+
|
|
229
|
+
n_samples = _num_samples(X)
|
|
230
|
+
indices = np.arange(n_samples)
|
|
231
|
+
time_order = np.argsort(timestamps)
|
|
232
|
+
sorted_indices = indices[time_order]
|
|
233
|
+
|
|
234
|
+
if self.n_splits_mode:
|
|
235
|
+
self._calculate_auto_sizes(n_samples)
|
|
236
|
+
|
|
237
|
+
if self.expanding_window:
|
|
238
|
+
yield from self._split_expanding(n_samples, sorted_indices, y, timestamps)
|
|
239
|
+
else:
|
|
240
|
+
yield from self._split_fixed(n_samples, sorted_indices, y, timestamps)
|
|
241
|
+
|
|
242
|
+
def _split_expanding(
|
|
243
|
+
self,
|
|
244
|
+
n_samples: int,
|
|
245
|
+
sorted_indices: np.ndarray,
|
|
246
|
+
y: Optional[np.ndarray],
|
|
247
|
+
timestamps: np.ndarray,
|
|
248
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
249
|
+
"""Generate expanding window splits."""
|
|
250
|
+
min_train_size = self.window_size
|
|
251
|
+
total_min = min_train_size + self.gap + self.test_size
|
|
252
|
+
|
|
253
|
+
if n_samples < total_min:
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"Not enough samples ({n_samples}) for even one split. "
|
|
256
|
+
f"Need at least {total_min} samples."
|
|
257
|
+
)
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
test_start_pos = min_train_size + self.gap
|
|
261
|
+
|
|
262
|
+
while test_start_pos + self.test_size <= n_samples:
|
|
263
|
+
test_end_pos = test_start_pos + self.test_size
|
|
264
|
+
train_end_pos = test_start_pos - self.gap
|
|
265
|
+
train_indices = sorted_indices[0:train_end_pos]
|
|
266
|
+
test_indices = sorted_indices[test_start_pos:test_end_pos]
|
|
267
|
+
|
|
268
|
+
if self.undersample and y is not None:
|
|
269
|
+
train_indices = self._undersample_indices(train_indices, y, timestamps)
|
|
270
|
+
|
|
271
|
+
assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
|
|
272
|
+
yield train_indices, test_indices
|
|
273
|
+
test_start_pos += self.step_size
|
|
274
|
+
|
|
275
|
+
def _split_fixed(
|
|
276
|
+
self,
|
|
277
|
+
n_samples: int,
|
|
278
|
+
sorted_indices: np.ndarray,
|
|
279
|
+
y: Optional[np.ndarray],
|
|
280
|
+
timestamps: np.ndarray,
|
|
281
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
|
|
282
|
+
"""Generate fixed sliding window splits."""
|
|
283
|
+
total_window = self.window_size + self.gap + self.test_size
|
|
284
|
+
|
|
285
|
+
for start in range(0, n_samples - total_window + 1, self.step_size):
|
|
286
|
+
train_end = start + self.window_size
|
|
287
|
+
test_start = train_end + self.gap
|
|
288
|
+
test_end = test_start + self.test_size
|
|
289
|
+
|
|
290
|
+
if test_end > n_samples:
|
|
291
|
+
break
|
|
292
|
+
|
|
293
|
+
train_indices = sorted_indices[start:train_end]
|
|
294
|
+
test_indices = sorted_indices[test_start:test_end]
|
|
295
|
+
|
|
296
|
+
if self.undersample and y is not None:
|
|
297
|
+
train_indices = self._undersample_indices(train_indices, y, timestamps)
|
|
298
|
+
|
|
299
|
+
assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
|
|
300
|
+
yield train_indices, test_indices
|
|
301
|
+
|
|
302
|
+
def split_with_val(
|
|
303
|
+
self,
|
|
304
|
+
X: np.ndarray,
|
|
305
|
+
y: Optional[np.ndarray] = None,
|
|
306
|
+
timestamps: Optional[np.ndarray] = None,
|
|
307
|
+
groups: Optional[np.ndarray] = None,
|
|
308
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
309
|
+
"""Generate sliding window splits with validation set.
|
|
310
|
+
|
|
311
|
+
The validation set comes after training but before test, maintaining
|
|
312
|
+
temporal order: train < val < test.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
X : array-like, shape (n_samples, n_features)
|
|
317
|
+
Training data
|
|
318
|
+
y : array-like, shape (n_samples,), optional
|
|
319
|
+
Target variable
|
|
320
|
+
timestamps : array-like, shape (n_samples,), optional
|
|
321
|
+
Timestamps for temporal ordering. If None, uses sequential order
|
|
322
|
+
groups : array-like, shape (n_samples,), optional
|
|
323
|
+
Group labels (not used in this splitter)
|
|
324
|
+
|
|
325
|
+
Yields
|
|
326
|
+
------
|
|
327
|
+
train : ndarray
|
|
328
|
+
Training set indices
|
|
329
|
+
val : ndarray
|
|
330
|
+
Validation set indices
|
|
331
|
+
test : ndarray
|
|
332
|
+
Test set indices
|
|
333
|
+
"""
|
|
334
|
+
if timestamps is None:
|
|
335
|
+
timestamps = np.arange(len(X))
|
|
336
|
+
|
|
337
|
+
n_samples = _num_samples(X)
|
|
338
|
+
indices = np.arange(n_samples)
|
|
339
|
+
time_order = np.argsort(timestamps)
|
|
340
|
+
sorted_indices = indices[time_order]
|
|
341
|
+
|
|
342
|
+
if self.n_splits_mode:
|
|
343
|
+
self._calculate_auto_sizes(n_samples)
|
|
344
|
+
|
|
345
|
+
val_size = int(self.window_size * self.val_ratio) if self.val_ratio > 0 else 0
|
|
346
|
+
actual_train_size = self.window_size - val_size
|
|
347
|
+
|
|
348
|
+
if self.expanding_window:
|
|
349
|
+
yield from self._split_with_val_expanding(
|
|
350
|
+
n_samples, sorted_indices, y, timestamps, val_size
|
|
351
|
+
)
|
|
352
|
+
else:
|
|
353
|
+
yield from self._split_with_val_fixed(
|
|
354
|
+
n_samples, sorted_indices, y, timestamps, val_size, actual_train_size
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def _split_with_val_expanding(
|
|
358
|
+
self,
|
|
359
|
+
n_samples: int,
|
|
360
|
+
sorted_indices: np.ndarray,
|
|
361
|
+
y: Optional[np.ndarray],
|
|
362
|
+
timestamps: np.ndarray,
|
|
363
|
+
val_size: int,
|
|
364
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
365
|
+
"""Generate expanding window splits with validation."""
|
|
366
|
+
min_train_size = self.window_size
|
|
367
|
+
total_min = min_train_size + self.gap + self.test_size
|
|
368
|
+
|
|
369
|
+
if n_samples < total_min:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"Not enough samples ({n_samples}) for even one split. "
|
|
372
|
+
f"Need at least {total_min} samples."
|
|
373
|
+
)
|
|
374
|
+
return
|
|
375
|
+
|
|
376
|
+
test_start_pos = min_train_size + self.gap
|
|
377
|
+
|
|
378
|
+
while test_start_pos + self.test_size <= n_samples:
|
|
379
|
+
test_end_pos = test_start_pos + self.test_size
|
|
380
|
+
train_val_end_pos = test_start_pos - self.gap
|
|
381
|
+
|
|
382
|
+
if val_size > 0:
|
|
383
|
+
current_val_size = int(train_val_end_pos * self.val_ratio)
|
|
384
|
+
train_end_pos = train_val_end_pos - current_val_size
|
|
385
|
+
train_indices = sorted_indices[0:train_end_pos]
|
|
386
|
+
val_indices = sorted_indices[train_end_pos:train_val_end_pos]
|
|
387
|
+
else:
|
|
388
|
+
train_indices = sorted_indices[0:train_val_end_pos]
|
|
389
|
+
val_indices = np.array([])
|
|
390
|
+
|
|
391
|
+
test_indices = sorted_indices[test_start_pos:test_end_pos]
|
|
392
|
+
|
|
393
|
+
if self.undersample and y is not None:
|
|
394
|
+
train_indices = self._undersample_indices(train_indices, y, timestamps)
|
|
395
|
+
if len(val_indices) > 0:
|
|
396
|
+
val_indices = self._undersample_indices(val_indices, y, timestamps)
|
|
397
|
+
|
|
398
|
+
assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
|
|
399
|
+
yield train_indices, val_indices, test_indices
|
|
400
|
+
test_start_pos += self.step_size
|
|
401
|
+
|
|
402
|
+
def _split_with_val_fixed(
|
|
403
|
+
self,
|
|
404
|
+
n_samples: int,
|
|
405
|
+
sorted_indices: np.ndarray,
|
|
406
|
+
y: Optional[np.ndarray],
|
|
407
|
+
timestamps: np.ndarray,
|
|
408
|
+
val_size: int,
|
|
409
|
+
actual_train_size: int,
|
|
410
|
+
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
411
|
+
"""Generate fixed window splits with validation."""
|
|
412
|
+
total_window = self.window_size + self.gap + self.test_size
|
|
413
|
+
|
|
414
|
+
for start in range(0, n_samples - total_window + 1, self.step_size):
|
|
415
|
+
train_end = start + actual_train_size
|
|
416
|
+
val_start = train_end + (self.gap if val_size > 0 else 0)
|
|
417
|
+
val_end = val_start + val_size
|
|
418
|
+
test_start = val_end + self.gap if val_size > 0 else train_end + self.gap
|
|
419
|
+
test_end = test_start + self.test_size
|
|
420
|
+
|
|
421
|
+
if test_end > n_samples:
|
|
422
|
+
break
|
|
423
|
+
|
|
424
|
+
train_indices = sorted_indices[start:train_end]
|
|
425
|
+
val_indices = (
|
|
426
|
+
sorted_indices[val_start:val_end] if val_size > 0 else np.array([])
|
|
427
|
+
)
|
|
428
|
+
test_indices = sorted_indices[test_start:test_end]
|
|
429
|
+
|
|
430
|
+
if self.undersample and y is not None:
|
|
431
|
+
train_indices = self._undersample_indices(train_indices, y, timestamps)
|
|
432
|
+
if len(val_indices) > 0:
|
|
433
|
+
val_indices = self._undersample_indices(val_indices, y, timestamps)
|
|
434
|
+
|
|
435
|
+
assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
|
|
436
|
+
yield train_indices, val_indices, test_indices
|
|
437
|
+
|
|
438
|
+
def get_n_splits(self, X=None, y=None, groups=None):
|
|
439
|
+
"""Calculate number of splits.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
X : array-like, shape (n_samples, n_features), optional
|
|
444
|
+
Training data (required to determine number of splits in manual mode)
|
|
445
|
+
y : array-like, optional
|
|
446
|
+
Not used
|
|
447
|
+
groups : array-like, optional
|
|
448
|
+
Not used
|
|
449
|
+
|
|
450
|
+
Returns
|
|
451
|
+
-------
|
|
452
|
+
n_splits : int
|
|
453
|
+
Number of splits. Returns -1 if X is None and not in n_splits mode.
|
|
454
|
+
"""
|
|
455
|
+
if self.n_splits_mode:
|
|
456
|
+
return self._n_splits
|
|
457
|
+
|
|
458
|
+
if X is None:
|
|
459
|
+
return -1
|
|
460
|
+
|
|
461
|
+
n_samples = _num_samples(X)
|
|
462
|
+
total_window = self.window_size + self.gap + self.test_size
|
|
463
|
+
n_windows = (n_samples - total_window) // self.step_size + 1
|
|
464
|
+
return max(0, n_windows)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
# EOF
|