scitex 2.14.0__py3-none-any.whl → 2.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +21 -204
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +76 -27
  68. scitex/cli/capture.py +13 -20
  69. scitex/cli/introspect.py +481 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +357 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +23 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +314 -0
  79. scitex/cli/stats.py +15 -8
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +132 -8
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  178. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  179. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  180. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  181. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  182. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  183. scitex/security/README.md +3 -3
  184. scitex/session/README.md +1 -1
  185. scitex/session/__init__.py +26 -7
  186. scitex/session/_decorator.py +1 -1
  187. scitex/sh/README.md +1 -1
  188. scitex/sh/__init__.py +7 -4
  189. scitex/social/__init__.py +155 -0
  190. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  191. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  192. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  193. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  194. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  195. scitex/stats/_mcp/_handlers/_format.py +94 -0
  196. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  197. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  198. scitex/stats/_mcp/_handlers/_power.py +247 -0
  199. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  200. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  201. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  202. scitex/stats/_mcp/handlers.py +19 -1171
  203. scitex/stats/auto/_stat_style.py +175 -0
  204. scitex/stats/auto/_style_definitions.py +411 -0
  205. scitex/stats/auto/_styles.py +22 -620
  206. scitex/stats/descriptive/__init__.py +11 -8
  207. scitex/stats/descriptive/_ci.py +39 -0
  208. scitex/stats/power/_power.py +15 -4
  209. scitex/str/__init__.py +2 -1
  210. scitex/str/_title_case.py +63 -0
  211. scitex/template/README.md +1 -1
  212. scitex/template/__init__.py +25 -10
  213. scitex/template/_code_templates.py +147 -0
  214. scitex/template/_mcp/handlers.py +81 -0
  215. scitex/template/_mcp/tool_schemas.py +55 -0
  216. scitex/template/_templates/__init__.py +51 -0
  217. scitex/template/_templates/audio.py +233 -0
  218. scitex/template/_templates/canvas.py +312 -0
  219. scitex/template/_templates/capture.py +268 -0
  220. scitex/template/_templates/config.py +43 -0
  221. scitex/template/_templates/diagram.py +294 -0
  222. scitex/template/_templates/io.py +107 -0
  223. scitex/template/_templates/module.py +53 -0
  224. scitex/template/_templates/plt.py +202 -0
  225. scitex/template/_templates/scholar.py +267 -0
  226. scitex/template/_templates/session.py +130 -0
  227. scitex/template/_templates/session_minimal.py +43 -0
  228. scitex/template/_templates/session_plot.py +67 -0
  229. scitex/template/_templates/session_stats.py +77 -0
  230. scitex/template/_templates/stats.py +323 -0
  231. scitex/template/_templates/writer.py +296 -0
  232. scitex/template/clone_writer_directory.py +5 -5
  233. scitex/ui/_backends/_email.py +10 -2
  234. scitex/ui/_backends/_webhook.py +5 -1
  235. scitex/web/_search_pubmed.py +10 -6
  236. scitex/writer/README.md +1 -1
  237. scitex/writer/_mcp/handlers.py +11 -744
  238. scitex/writer/_mcp/tool_schemas.py +5 -335
  239. scitex-2.15.2.dist-info/METADATA +648 -0
  240. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/RECORD +246 -150
  241. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  242. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  243. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  244. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  245. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  246. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  247. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  248. scitex/diagram/_compile.py +0 -312
  249. scitex/diagram/_diagram.py +0 -355
  250. scitex/diagram/_mcp/__init__.py +0 -4
  251. scitex/diagram/_mcp/handlers.py +0 -400
  252. scitex/diagram/_mcp/tool_schemas.py +0 -157
  253. scitex/diagram/_presets.py +0 -173
  254. scitex/diagram/_schema.py +0 -182
  255. scitex/diagram/_split.py +0 -278
  256. scitex/gen/_ci.py +0 -12
  257. scitex/gen/_title_case.py +0 -89
  258. scitex/plt/_mcp/__init__.py +0 -4
  259. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  260. scitex/plt/_mcp/_handlers_figure.py +0 -195
  261. scitex/plt/_mcp/_handlers_plot.py +0 -252
  262. scitex/plt/_mcp/_handlers_style.py +0 -219
  263. scitex/plt/_mcp/handlers.py +0 -74
  264. scitex/plt/_mcp/tool_schemas.py +0 -497
  265. scitex/plt/mcp_server.py +0 -231
  266. scitex/scholar/data/.gitkeep +0 -0
  267. scitex/scholar/data/README.md +0 -44
  268. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  269. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  270. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  271. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  272. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  273. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  274. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  275. scitex/scholar/data/bib_files/pac.bib +0 -698
  276. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  277. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  278. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  279. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  280. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  281. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  282. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  283. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  284. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  285. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  286. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  287. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  288. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  289. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  290. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  291. scitex/scholar/data/impact_factor.db +0 -0
  292. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  293. scitex/scholar/examples/dev.py +0 -38
  294. scitex-2.14.0.dist-info/METADATA +0 -1238
  295. /scitex/{gen → context}/_detect_environment.py +0 -0
  296. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  297. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  298. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
  299. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
  300. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,467 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/ai/classification/timeseries/_sliding_window_core.py
4
+
5
+ """Core TimeSeriesSlidingWindowSplit class without visualization."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Iterator, Optional, Tuple
10
+
11
+ import numpy as np
12
+ from sklearn.model_selection import BaseCrossValidator
13
+ from sklearn.utils.validation import _num_samples
14
+
15
+ from scitex import logging
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ __all__ = ["TimeSeriesSlidingWindowSplitCore"]
20
+
21
+
22
+ class TimeSeriesSlidingWindowSplitCore(BaseCrossValidator):
23
+ """Sliding window cross-validation for time series (core functionality).
24
+
25
+ Creates train/test windows that slide through time with configurable behavior.
26
+
27
+ Parameters
28
+ ----------
29
+ window_size : int, optional
30
+ Size of training window (ignored if expanding_window=True or n_splits is set).
31
+ Required if n_splits is None.
32
+ step_size : int, optional
33
+ Step between windows (overridden if overlapping_tests=False)
34
+ test_size : int, optional
35
+ Size of test window. Required if n_splits is None.
36
+ gap : int, default=0
37
+ Number of samples to skip between train and test windows
38
+ val_ratio : float, default=0.0
39
+ Ratio of validation set from training window
40
+ random_state : int, optional
41
+ Random seed for reproducibility
42
+ overlapping_tests : bool, default=False
43
+ If False, automatically sets step_size=test_size to ensure each sample
44
+ is tested exactly once (like K-fold for time series)
45
+ expanding_window : bool, default=False
46
+ If True, training window grows to include all past data (like sklearn's
47
+ TimeSeriesSplit). If False, uses fixed sliding window of size window_size.
48
+ undersample : bool, default=False
49
+ If True, balance classes in training sets by randomly undersampling
50
+ the majority class to match the minority class count. Temporal order
51
+ is maintained. Requires y labels in split().
52
+ n_splits : int, optional
53
+ Number of splits to generate. If specified, window_size and test_size
54
+ are automatically calculated to create exactly n_splits folds.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ window_size: Optional[int] = None,
60
+ step_size: Optional[int] = None,
61
+ test_size: Optional[int] = None,
62
+ gap: int = 0,
63
+ val_ratio: float = 0.0,
64
+ random_state: Optional[int] = None,
65
+ overlapping_tests: bool = False,
66
+ expanding_window: bool = False,
67
+ undersample: bool = False,
68
+ n_splits: Optional[int] = None,
69
+ ):
70
+ # Handle n_splits mode vs manual mode
71
+ if n_splits is not None:
72
+ self.n_splits_mode = True
73
+ self._n_splits = n_splits
74
+ self.window_size = window_size if window_size is not None else 50
75
+ self.test_size = test_size if test_size is not None else 10
76
+ else:
77
+ if window_size is None or test_size is None:
78
+ raise ValueError(
79
+ "Either n_splits OR (window_size AND test_size) must be specified"
80
+ )
81
+ self.n_splits_mode = False
82
+ self._n_splits = None
83
+ self.window_size = window_size
84
+ self.test_size = test_size
85
+
86
+ self.gap = gap
87
+ self.val_ratio = val_ratio
88
+ self.random_state = random_state
89
+ self.rng = np.random.default_rng(random_state)
90
+ self.overlapping_tests = overlapping_tests
91
+ self.expanding_window = expanding_window
92
+ self.undersample = undersample
93
+
94
+ # Handle step_size logic
95
+ if not overlapping_tests:
96
+ if step_size is not None and step_size < test_size:
97
+ logger.warning(
98
+ f"overlapping_tests=False but step_size={step_size} < test_size={test_size}. "
99
+ f"Setting step_size=test_size={test_size}."
100
+ )
101
+ self.step_size = test_size
102
+ elif step_size is None:
103
+ self.step_size = test_size
104
+ logger.info(
105
+ f"step_size not specified with overlapping_tests=False. "
106
+ f"Using step_size=test_size={test_size}."
107
+ )
108
+ else:
109
+ self.step_size = step_size
110
+ else:
111
+ if step_size is None:
112
+ self.step_size = max(1, test_size // 2)
113
+ logger.info(
114
+ f"step_size not specified with overlapping_tests=True. "
115
+ f"Using step_size={self.step_size} (50% overlap)."
116
+ )
117
+ else:
118
+ self.step_size = step_size
119
+
120
+ def _undersample_indices(
121
+ self, train_indices: np.ndarray, y: np.ndarray, timestamps: np.ndarray
122
+ ) -> np.ndarray:
123
+ """Undersample majority class to balance training set.
124
+
125
+ Maintains temporal order of samples.
126
+
127
+ Parameters
128
+ ----------
129
+ train_indices : ndarray
130
+ Original training indices
131
+ y : ndarray
132
+ Full label array
133
+ timestamps : ndarray
134
+ Full timestamp array
135
+
136
+ Returns
137
+ -------
138
+ ndarray
139
+ Undersampled training indices (sorted by timestamp)
140
+ """
141
+ train_labels = y[train_indices]
142
+ unique_classes, class_counts = np.unique(train_labels, return_counts=True)
143
+
144
+ if len(unique_classes) < 2:
145
+ return train_indices
146
+
147
+ min_count = class_counts.min()
148
+
149
+ undersampled_indices = []
150
+ for cls in unique_classes:
151
+ cls_mask = train_labels == cls
152
+ cls_train_indices = train_indices[cls_mask]
153
+
154
+ if len(cls_train_indices) > min_count:
155
+ selected = self.rng.choice(
156
+ cls_train_indices, size=min_count, replace=False
157
+ )
158
+ undersampled_indices.extend(selected)
159
+ else:
160
+ undersampled_indices.extend(cls_train_indices)
161
+
162
+ undersampled_indices = np.array(undersampled_indices)
163
+ temporal_order = np.argsort(timestamps[undersampled_indices])
164
+ undersampled_indices = undersampled_indices[temporal_order]
165
+
166
+ return undersampled_indices
167
+
168
+ def _calculate_auto_sizes(self, n_samples: int) -> None:
169
+ """Auto-calculate window and test sizes for n_splits mode."""
170
+ if self.expanding_window:
171
+ min_window_size = max(1, n_samples // (self._n_splits + 1))
172
+ available_for_test = (
173
+ n_samples - min_window_size - (self._n_splits * self.gap)
174
+ )
175
+ calculated_test_size = max(1, available_for_test // self._n_splits)
176
+
177
+ self.window_size = min_window_size
178
+ self.test_size = calculated_test_size
179
+ self.step_size = calculated_test_size
180
+
181
+ logger.info(
182
+ f"n_splits={self._n_splits} with expanding_window: "
183
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
184
+ )
185
+ else:
186
+ available = n_samples - (self._n_splits * self.gap)
187
+ calculated_test_size = max(1, available // (self._n_splits + 1))
188
+ calculated_window_size = calculated_test_size
189
+
190
+ self.window_size = calculated_window_size
191
+ self.test_size = calculated_test_size
192
+ self.step_size = calculated_test_size
193
+
194
+ logger.info(
195
+ f"n_splits={self._n_splits} with fixed window: "
196
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
197
+ )
198
+
199
+ def split(
200
+ self,
201
+ X: np.ndarray,
202
+ y: Optional[np.ndarray] = None,
203
+ timestamps: Optional[np.ndarray] = None,
204
+ groups: Optional[np.ndarray] = None,
205
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
206
+ """Generate sliding window splits.
207
+
208
+ Parameters
209
+ ----------
210
+ X : array-like, shape (n_samples, n_features)
211
+ Training data
212
+ y : array-like, shape (n_samples,), optional
213
+ Target variable
214
+ timestamps : array-like, shape (n_samples,), optional
215
+ Timestamps for temporal ordering. If None, uses sequential order
216
+ groups : array-like, shape (n_samples,), optional
217
+ Group labels (not used in this splitter)
218
+
219
+ Yields
220
+ ------
221
+ train : ndarray
222
+ Training set indices
223
+ test : ndarray
224
+ Test set indices
225
+ """
226
+ if timestamps is None:
227
+ timestamps = np.arange(len(X))
228
+
229
+ n_samples = _num_samples(X)
230
+ indices = np.arange(n_samples)
231
+ time_order = np.argsort(timestamps)
232
+ sorted_indices = indices[time_order]
233
+
234
+ if self.n_splits_mode:
235
+ self._calculate_auto_sizes(n_samples)
236
+
237
+ if self.expanding_window:
238
+ yield from self._split_expanding(n_samples, sorted_indices, y, timestamps)
239
+ else:
240
+ yield from self._split_fixed(n_samples, sorted_indices, y, timestamps)
241
+
242
+ def _split_expanding(
243
+ self,
244
+ n_samples: int,
245
+ sorted_indices: np.ndarray,
246
+ y: Optional[np.ndarray],
247
+ timestamps: np.ndarray,
248
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
249
+ """Generate expanding window splits."""
250
+ min_train_size = self.window_size
251
+ total_min = min_train_size + self.gap + self.test_size
252
+
253
+ if n_samples < total_min:
254
+ logger.warning(
255
+ f"Not enough samples ({n_samples}) for even one split. "
256
+ f"Need at least {total_min} samples."
257
+ )
258
+ return
259
+
260
+ test_start_pos = min_train_size + self.gap
261
+
262
+ while test_start_pos + self.test_size <= n_samples:
263
+ test_end_pos = test_start_pos + self.test_size
264
+ train_end_pos = test_start_pos - self.gap
265
+ train_indices = sorted_indices[0:train_end_pos]
266
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
267
+
268
+ if self.undersample and y is not None:
269
+ train_indices = self._undersample_indices(train_indices, y, timestamps)
270
+
271
+ assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
272
+ yield train_indices, test_indices
273
+ test_start_pos += self.step_size
274
+
275
+ def _split_fixed(
276
+ self,
277
+ n_samples: int,
278
+ sorted_indices: np.ndarray,
279
+ y: Optional[np.ndarray],
280
+ timestamps: np.ndarray,
281
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
282
+ """Generate fixed sliding window splits."""
283
+ total_window = self.window_size + self.gap + self.test_size
284
+
285
+ for start in range(0, n_samples - total_window + 1, self.step_size):
286
+ train_end = start + self.window_size
287
+ test_start = train_end + self.gap
288
+ test_end = test_start + self.test_size
289
+
290
+ if test_end > n_samples:
291
+ break
292
+
293
+ train_indices = sorted_indices[start:train_end]
294
+ test_indices = sorted_indices[test_start:test_end]
295
+
296
+ if self.undersample and y is not None:
297
+ train_indices = self._undersample_indices(train_indices, y, timestamps)
298
+
299
+ assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
300
+ yield train_indices, test_indices
301
+
302
+ def split_with_val(
303
+ self,
304
+ X: np.ndarray,
305
+ y: Optional[np.ndarray] = None,
306
+ timestamps: Optional[np.ndarray] = None,
307
+ groups: Optional[np.ndarray] = None,
308
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
309
+ """Generate sliding window splits with validation set.
310
+
311
+ The validation set comes after training but before test, maintaining
312
+ temporal order: train < val < test.
313
+
314
+ Parameters
315
+ ----------
316
+ X : array-like, shape (n_samples, n_features)
317
+ Training data
318
+ y : array-like, shape (n_samples,), optional
319
+ Target variable
320
+ timestamps : array-like, shape (n_samples,), optional
321
+ Timestamps for temporal ordering. If None, uses sequential order
322
+ groups : array-like, shape (n_samples,), optional
323
+ Group labels (not used in this splitter)
324
+
325
+ Yields
326
+ ------
327
+ train : ndarray
328
+ Training set indices
329
+ val : ndarray
330
+ Validation set indices
331
+ test : ndarray
332
+ Test set indices
333
+ """
334
+ if timestamps is None:
335
+ timestamps = np.arange(len(X))
336
+
337
+ n_samples = _num_samples(X)
338
+ indices = np.arange(n_samples)
339
+ time_order = np.argsort(timestamps)
340
+ sorted_indices = indices[time_order]
341
+
342
+ if self.n_splits_mode:
343
+ self._calculate_auto_sizes(n_samples)
344
+
345
+ val_size = int(self.window_size * self.val_ratio) if self.val_ratio > 0 else 0
346
+ actual_train_size = self.window_size - val_size
347
+
348
+ if self.expanding_window:
349
+ yield from self._split_with_val_expanding(
350
+ n_samples, sorted_indices, y, timestamps, val_size
351
+ )
352
+ else:
353
+ yield from self._split_with_val_fixed(
354
+ n_samples, sorted_indices, y, timestamps, val_size, actual_train_size
355
+ )
356
+
357
+ def _split_with_val_expanding(
358
+ self,
359
+ n_samples: int,
360
+ sorted_indices: np.ndarray,
361
+ y: Optional[np.ndarray],
362
+ timestamps: np.ndarray,
363
+ val_size: int,
364
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
365
+ """Generate expanding window splits with validation."""
366
+ min_train_size = self.window_size
367
+ total_min = min_train_size + self.gap + self.test_size
368
+
369
+ if n_samples < total_min:
370
+ logger.warning(
371
+ f"Not enough samples ({n_samples}) for even one split. "
372
+ f"Need at least {total_min} samples."
373
+ )
374
+ return
375
+
376
+ test_start_pos = min_train_size + self.gap
377
+
378
+ while test_start_pos + self.test_size <= n_samples:
379
+ test_end_pos = test_start_pos + self.test_size
380
+ train_val_end_pos = test_start_pos - self.gap
381
+
382
+ if val_size > 0:
383
+ current_val_size = int(train_val_end_pos * self.val_ratio)
384
+ train_end_pos = train_val_end_pos - current_val_size
385
+ train_indices = sorted_indices[0:train_end_pos]
386
+ val_indices = sorted_indices[train_end_pos:train_val_end_pos]
387
+ else:
388
+ train_indices = sorted_indices[0:train_val_end_pos]
389
+ val_indices = np.array([])
390
+
391
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
392
+
393
+ if self.undersample and y is not None:
394
+ train_indices = self._undersample_indices(train_indices, y, timestamps)
395
+ if len(val_indices) > 0:
396
+ val_indices = self._undersample_indices(val_indices, y, timestamps)
397
+
398
+ assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
399
+ yield train_indices, val_indices, test_indices
400
+ test_start_pos += self.step_size
401
+
402
+ def _split_with_val_fixed(
403
+ self,
404
+ n_samples: int,
405
+ sorted_indices: np.ndarray,
406
+ y: Optional[np.ndarray],
407
+ timestamps: np.ndarray,
408
+ val_size: int,
409
+ actual_train_size: int,
410
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
411
+ """Generate fixed window splits with validation."""
412
+ total_window = self.window_size + self.gap + self.test_size
413
+
414
+ for start in range(0, n_samples - total_window + 1, self.step_size):
415
+ train_end = start + actual_train_size
416
+ val_start = train_end + (self.gap if val_size > 0 else 0)
417
+ val_end = val_start + val_size
418
+ test_start = val_end + self.gap if val_size > 0 else train_end + self.gap
419
+ test_end = test_start + self.test_size
420
+
421
+ if test_end > n_samples:
422
+ break
423
+
424
+ train_indices = sorted_indices[start:train_end]
425
+ val_indices = (
426
+ sorted_indices[val_start:val_end] if val_size > 0 else np.array([])
427
+ )
428
+ test_indices = sorted_indices[test_start:test_end]
429
+
430
+ if self.undersample and y is not None:
431
+ train_indices = self._undersample_indices(train_indices, y, timestamps)
432
+ if len(val_indices) > 0:
433
+ val_indices = self._undersample_indices(val_indices, y, timestamps)
434
+
435
+ assert len(train_indices) > 0 and len(test_indices) > 0, "Empty window"
436
+ yield train_indices, val_indices, test_indices
437
+
438
+ def get_n_splits(self, X=None, y=None, groups=None):
439
+ """Calculate number of splits.
440
+
441
+ Parameters
442
+ ----------
443
+ X : array-like, shape (n_samples, n_features), optional
444
+ Training data (required to determine number of splits in manual mode)
445
+ y : array-like, optional
446
+ Not used
447
+ groups : array-like, optional
448
+ Not used
449
+
450
+ Returns
451
+ -------
452
+ n_splits : int
453
+ Number of splits. Returns -1 if X is None and not in n_splits mode.
454
+ """
455
+ if self.n_splits_mode:
456
+ return self._n_splits
457
+
458
+ if X is None:
459
+ return -1
460
+
461
+ n_samples = _num_samples(X)
462
+ total_window = self.window_size + self.gap + self.test_size
463
+ n_windows = (n_samples - total_window) // self.step_size + 1
464
+ return max(0, n_windows)
465
+
466
+
467
+ # EOF