scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1716 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-03 03:22:45 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """
15
+ Functionalities:
16
+ - Implements sliding window cross-validation for time series
17
+ - Creates overlapping train/test windows that slide through time
18
+ - Supports temporal gaps between train and test sets
19
+ - Provides visualization with scatter plots showing actual data points
20
+ - Validates temporal order in all windows
21
+ - Ensures no data leakage between train and test sets
22
+
23
+ Dependencies:
24
+ - packages:
25
+ - numpy
26
+ - sklearn
27
+ - matplotlib
28
+ - scitex
29
+
30
+ IO:
31
+ - input-files:
32
+ - None (generates synthetic data for demonstration)
33
+ - output-files:
34
+ - ./sliding_window_demo.png (visualization with scatter plots)
35
+ """
36
+
37
+ """Imports"""
38
+ import argparse
39
+ from typing import Iterator, Optional, Tuple
40
+
41
+ import matplotlib.patches as patches
42
+ import matplotlib.pyplot as plt
43
+ import numpy as np
44
+ import scitex as stx
45
+ from scitex import logging
46
+ from sklearn.model_selection import BaseCrossValidator
47
+ from sklearn.utils.validation import _num_samples
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+ COLORS = stx.plt.color.PARAMS
52
+ COLORS["RGBA_NORM"]
53
+
54
+
55
+ class TimeSeriesSlidingWindowSplit(BaseCrossValidator):
56
+ """
57
+ Sliding window cross-validation for time series.
58
+
59
+ Creates train/test windows that slide through time with configurable behavior.
60
+
61
+ Parameters
62
+ ----------
63
+ window_size : int, optional
64
+ Size of training window (ignored if expanding_window=True or n_splits is set).
65
+ Required if n_splits is None.
66
+ step_size : int, optional
67
+ Step between windows (overridden if overlapping_tests=False)
68
+ test_size : int, optional
69
+ Size of test window. Required if n_splits is None.
70
+ gap : int, default=0
71
+ Number of samples to skip between train and test windows
72
+ val_ratio : float, default=0.0
73
+ Ratio of validation set from training window
74
+ random_state : int, optional
75
+ Random seed for reproducibility
76
+ overlapping_tests : bool, default=False
77
+ If False, automatically sets step_size=test_size to ensure each sample
78
+ is tested exactly once (like K-fold for time series)
79
+ expanding_window : bool, default=False
80
+ If True, training window grows to include all past data (like sklearn's
81
+ TimeSeriesSplit). If False, uses fixed sliding window of size window_size.
82
+ undersample : bool, default=False
83
+ If True, balance classes in training sets by randomly undersampling
84
+ the majority class to match the minority class count. Temporal order
85
+ is maintained. Requires y labels in split().
86
+ n_splits : int, optional
87
+ Number of splits to generate. If specified, window_size and test_size
88
+ are automatically calculated to create exactly n_splits folds.
89
+ Cannot be used together with manual window_size/test_size specification.
90
+
91
+ Examples
92
+ --------
93
+ >>> from scitex.ml.classification import TimeSeriesSlidingWindowSplit
94
+ >>> import numpy as np
95
+ >>>
96
+ >>> X = np.random.randn(100, 10)
97
+ >>> y = np.random.randint(0, 2, 100)
98
+ >>> timestamps = np.arange(100)
99
+ >>>
100
+ >>> # Fixed window, non-overlapping tests (default)
101
+ >>> swcv = TimeSeriesSlidingWindowSplit(window_size=50, test_size=10, gap=5)
102
+ >>> for train_idx, test_idx in swcv.split(X, y, timestamps):
103
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}")
104
+ >>>
105
+ >>> # Expanding window (use all past data)
106
+ >>> swcv = TimeSeriesSlidingWindowSplit(
107
+ ... window_size=50, test_size=10, gap=5, expanding_window=True
108
+ ... )
109
+ >>> for train_idx, test_idx in swcv.split(X, y, timestamps):
110
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}") # Train grows!
111
+ >>>
112
+ >>> # Using n_splits (automatically calculates window and test sizes)
113
+ >>> swcv = TimeSeriesSlidingWindowSplit(
114
+ ... n_splits=5, gap=0, expanding_window=True, undersample=True
115
+ ... )
116
+ >>> for train_idx, test_idx in swcv.split(X, y, timestamps):
117
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}")
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ window_size: Optional[int] = None,
123
+ step_size: Optional[int] = None,
124
+ test_size: Optional[int] = None,
125
+ gap: int = 0,
126
+ val_ratio: float = 0.0,
127
+ random_state: Optional[int] = None,
128
+ overlapping_tests: bool = False,
129
+ expanding_window: bool = False,
130
+ undersample: bool = False,
131
+ n_splits: Optional[int] = None,
132
+ ):
133
+ # Handle n_splits mode vs manual mode
134
+ if n_splits is not None:
135
+ # n_splits mode: automatically calculate window_size and test_size
136
+ self.n_splits_mode = True
137
+ self._n_splits = n_splits
138
+ # Use placeholder values, will be calculated in split()
139
+ self.window_size = window_size if window_size is not None else 50
140
+ self.test_size = test_size if test_size is not None else 10
141
+ else:
142
+ # Manual mode: require window_size and test_size
143
+ if window_size is None or test_size is None:
144
+ raise ValueError(
145
+ "Either n_splits OR (window_size AND test_size) must be specified"
146
+ )
147
+ self.n_splits_mode = False
148
+ self._n_splits = None
149
+ self.window_size = window_size
150
+ self.test_size = test_size
151
+
152
+ self.gap = gap
153
+ self.val_ratio = val_ratio
154
+ self.random_state = random_state
155
+ self.rng = np.random.default_rng(random_state)
156
+ self.overlapping_tests = overlapping_tests
157
+ self.expanding_window = expanding_window
158
+ self.undersample = undersample
159
+
160
+ # Handle step_size logic
161
+ if not overlapping_tests:
162
+ # overlapping_tests=False: ensure non-overlapping tests
163
+ if step_size is not None and step_size < test_size:
164
+ logger.warning(
165
+ f"overlapping_tests=False but step_size={step_size} < test_size={test_size}. "
166
+ f"This would cause test overlap. Setting step_size=test_size={test_size}."
167
+ )
168
+ self.step_size = test_size
169
+ elif step_size is None:
170
+ # Default: non-overlapping tests
171
+ self.step_size = test_size
172
+ logger.info(
173
+ f"step_size not specified with overlapping_tests=False. "
174
+ f"Using step_size=test_size={test_size} for non-overlapping tests."
175
+ )
176
+ else:
177
+ # step_size >= test_size: acceptable, no overlap
178
+ self.step_size = step_size
179
+ else:
180
+ # overlapping_tests=True: allow any step_size
181
+ if step_size is None:
182
+ # Default for overlapping: half the test size for 50% overlap
183
+ self.step_size = max(1, test_size // 2)
184
+ logger.info(
185
+ f"step_size not specified with overlapping_tests=True. "
186
+ f"Using step_size={self.step_size} (50% overlap)."
187
+ )
188
+ else:
189
+ self.step_size = step_size
190
+
191
+ def _undersample_indices(
192
+ self, train_indices: np.ndarray, y: np.ndarray, timestamps: np.ndarray
193
+ ) -> np.ndarray:
194
+ """
195
+ Undersample majority class to balance training set.
196
+
197
+ Maintains temporal order of samples.
198
+
199
+ Parameters
200
+ ----------
201
+ train_indices : ndarray
202
+ Original training indices
203
+ y : ndarray
204
+ Full label array
205
+ timestamps : ndarray
206
+ Full timestamp array
207
+
208
+ Returns
209
+ -------
210
+ ndarray
211
+ Undersampled training indices (sorted by timestamp)
212
+ """
213
+ # Get labels for training indices
214
+ train_labels = y[train_indices]
215
+
216
+ # Find unique classes and their counts
217
+ unique_classes, class_counts = np.unique(
218
+ train_labels, return_counts=True
219
+ )
220
+
221
+ if len(unique_classes) < 2:
222
+ # Only one class, no undersampling needed
223
+ return train_indices
224
+
225
+ # Find minority class count
226
+ min_count = class_counts.min()
227
+
228
+ # Undersample each class to match minority class count
229
+ undersampled_indices = []
230
+ for cls in unique_classes:
231
+ # Find indices of this class within train_indices
232
+ cls_mask = train_labels == cls
233
+ cls_train_indices = train_indices[cls_mask]
234
+
235
+ if len(cls_train_indices) > min_count:
236
+ # Randomly select min_count samples
237
+ selected = self.rng.choice(
238
+ cls_train_indices, size=min_count, replace=False
239
+ )
240
+ undersampled_indices.extend(selected)
241
+ else:
242
+ # Keep all samples from minority class
243
+ undersampled_indices.extend(cls_train_indices)
244
+
245
+ # Convert to array and sort by timestamp to maintain temporal order
246
+ undersampled_indices = np.array(undersampled_indices)
247
+ temporal_order = np.argsort(timestamps[undersampled_indices])
248
+ undersampled_indices = undersampled_indices[temporal_order]
249
+
250
+ return undersampled_indices
251
+
252
+ def split(
253
+ self,
254
+ X: np.ndarray,
255
+ y: Optional[np.ndarray] = None,
256
+ timestamps: Optional[np.ndarray] = None,
257
+ groups: Optional[np.ndarray] = None,
258
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
259
+ """
260
+ Generate sliding window splits.
261
+
262
+ Parameters
263
+ ----------
264
+ X : array-like, shape (n_samples, n_features)
265
+ Training data
266
+ y : array-like, shape (n_samples,), optional
267
+ Target variable
268
+ timestamps : array-like, shape (n_samples,), optional
269
+ Timestamps for temporal ordering. If None, uses sequential order
270
+ groups : array-like, shape (n_samples,), optional
271
+ Group labels (not used in this splitter)
272
+
273
+ Yields
274
+ ------
275
+ train : ndarray
276
+ Training set indices
277
+ test : ndarray
278
+ Test set indices
279
+ """
280
+ if timestamps is None:
281
+ timestamps = np.arange(len(X))
282
+
283
+ n_samples = _num_samples(X)
284
+ indices = np.arange(n_samples)
285
+
286
+ # Sort by timestamp to get temporal order
287
+ time_order = np.argsort(timestamps)
288
+ sorted_indices = indices[time_order]
289
+
290
+ # Auto-calculate sizes if using n_splits mode
291
+ if self.n_splits_mode:
292
+ # Calculate test_size to create exactly n_splits folds
293
+ # Formula: n_samples = window_size + (n_splits * (test_size + gap))
294
+ # For expanding window, window_size is minimum training size
295
+ # We want non-overlapping tests by default
296
+
297
+ if self.expanding_window:
298
+ # Expanding window: start with minimum window, test slides forward
299
+ # Let's use 20% of data as initial window (similar to sklearn)
300
+ min_window_size = max(1, n_samples // (self._n_splits + 1))
301
+ available_for_test = (
302
+ n_samples - min_window_size - (self._n_splits * self.gap)
303
+ )
304
+ calculated_test_size = max(
305
+ 1, available_for_test // self._n_splits
306
+ )
307
+
308
+ # Set calculated values
309
+ self.window_size = min_window_size
310
+ self.test_size = calculated_test_size
311
+ self.step_size = (
312
+ calculated_test_size # Non-overlapping by default
313
+ )
314
+
315
+ logger.info(
316
+ f"n_splits={self._n_splits} with expanding_window: "
317
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
318
+ )
319
+ else:
320
+ # Fixed window: calculate window and test size
321
+ # We want: n_samples = window_size + (n_splits * (test_size + gap))
322
+ # Let's make window_size same as test_size for simplicity
323
+ available = n_samples - (self._n_splits * self.gap)
324
+ calculated_test_size = max(
325
+ 1, available // (self._n_splits + 1)
326
+ )
327
+ calculated_window_size = calculated_test_size
328
+
329
+ # Set calculated values
330
+ self.window_size = calculated_window_size
331
+ self.test_size = calculated_test_size
332
+ self.step_size = (
333
+ calculated_test_size # Non-overlapping by default
334
+ )
335
+
336
+ logger.info(
337
+ f"n_splits={self._n_splits} with fixed window: "
338
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
339
+ )
340
+
341
+ if self.expanding_window:
342
+ # Expanding window: training set grows to include all past data
343
+ # Start with minimum window_size, test slides forward
344
+ min_train_size = self.window_size
345
+ total_min = min_train_size + self.gap + self.test_size
346
+
347
+ if n_samples < total_min:
348
+ logger.warning(
349
+ f"Not enough samples ({n_samples}) for even one split. "
350
+ f"Need at least {total_min} samples."
351
+ )
352
+ return
353
+
354
+ # First fold starts at window_size
355
+ test_start_pos = min_train_size + self.gap
356
+
357
+ while test_start_pos + self.test_size <= n_samples:
358
+ test_end_pos = test_start_pos + self.test_size
359
+
360
+ # Training includes all data from start to before gap
361
+ train_end_pos = test_start_pos - self.gap
362
+ train_indices = sorted_indices[0:train_end_pos]
363
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
364
+
365
+ # Apply undersampling if enabled and y is provided
366
+ if self.undersample and y is not None:
367
+ train_indices = self._undersample_indices(
368
+ train_indices, y, timestamps
369
+ )
370
+
371
+ assert (
372
+ len(train_indices) > 0 and len(test_indices) > 0
373
+ ), "Empty window"
374
+
375
+ yield train_indices, test_indices
376
+
377
+ # Move test window forward by step_size
378
+ test_start_pos += self.step_size
379
+
380
+ else:
381
+ # Fixed sliding window: window slides through data
382
+ total_window = self.window_size + self.gap + self.test_size
383
+
384
+ for start in range(
385
+ 0, n_samples - total_window + 1, self.step_size
386
+ ):
387
+ # These positions are in the sorted (temporal) domain
388
+ train_end = start + self.window_size
389
+ test_start = train_end + self.gap
390
+ test_end = test_start + self.test_size
391
+
392
+ if test_end > n_samples:
393
+ break
394
+
395
+ # Extract indices from the temporally sorted sequence
396
+ train_indices = sorted_indices[start:train_end]
397
+ test_indices = sorted_indices[test_start:test_end]
398
+
399
+ # Apply undersampling if enabled and y is provided
400
+ if self.undersample and y is not None:
401
+ train_indices = self._undersample_indices(
402
+ train_indices, y, timestamps
403
+ )
404
+
405
+ assert (
406
+ len(train_indices) > 0 and len(test_indices) > 0
407
+ ), "Empty window"
408
+
409
+ yield train_indices, test_indices
410
+
411
+ def split_with_val(
412
+ self,
413
+ X: np.ndarray,
414
+ y: Optional[np.ndarray] = None,
415
+ timestamps: Optional[np.ndarray] = None,
416
+ groups: Optional[np.ndarray] = None,
417
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
418
+ """
419
+ Generate sliding window splits with validation set.
420
+
421
+ The validation set comes after training but before test, maintaining
422
+ temporal order: train < val < test.
423
+
424
+ Parameters
425
+ ----------
426
+ X : array-like, shape (n_samples, n_features)
427
+ Training data
428
+ y : array-like, shape (n_samples,), optional
429
+ Target variable
430
+ timestamps : array-like, shape (n_samples,), optional
431
+ Timestamps for temporal ordering. If None, uses sequential order
432
+ groups : array-like, shape (n_samples,), optional
433
+ Group labels (not used in this splitter)
434
+
435
+ Yields
436
+ ------
437
+ train : ndarray
438
+ Training set indices
439
+ val : ndarray
440
+ Validation set indices
441
+ test : ndarray
442
+ Test set indices
443
+ """
444
+ if timestamps is None:
445
+ timestamps = np.arange(len(X))
446
+
447
+ n_samples = _num_samples(X)
448
+ indices = np.arange(n_samples)
449
+
450
+ # Sort by timestamp to get temporal order
451
+ time_order = np.argsort(timestamps)
452
+ sorted_indices = indices[time_order]
453
+
454
+ # Auto-calculate sizes if using n_splits mode
455
+ if self.n_splits_mode:
456
+ if self.expanding_window:
457
+ min_window_size = max(1, n_samples // (self._n_splits + 1))
458
+ available_for_test = (
459
+ n_samples - min_window_size - (self._n_splits * self.gap)
460
+ )
461
+ calculated_test_size = max(
462
+ 1, available_for_test // self._n_splits
463
+ )
464
+ self.window_size = min_window_size
465
+ self.test_size = calculated_test_size
466
+ self.step_size = calculated_test_size
467
+ else:
468
+ available = n_samples - (self._n_splits * self.gap)
469
+ calculated_test_size = max(
470
+ 1, available // (self._n_splits + 1)
471
+ )
472
+ calculated_window_size = calculated_test_size
473
+ self.window_size = calculated_window_size
474
+ self.test_size = calculated_test_size
475
+ self.step_size = calculated_test_size
476
+
477
+ # Calculate validation size from training window
478
+ val_size = (
479
+ int(self.window_size * self.val_ratio) if self.val_ratio > 0 else 0
480
+ )
481
+ actual_train_size = self.window_size - val_size
482
+
483
+ if self.expanding_window:
484
+ # Expanding window with validation
485
+ min_train_size = self.window_size
486
+ total_min = min_train_size + self.gap + self.test_size
487
+
488
+ if n_samples < total_min:
489
+ logger.warning(
490
+ f"Not enough samples ({n_samples}) for even one split. "
491
+ f"Need at least {total_min} samples."
492
+ )
493
+ return
494
+
495
+ # Calculate positions for validation and test
496
+ test_start_pos = min_train_size + self.gap
497
+
498
+ while test_start_pos + self.test_size <= n_samples:
499
+ test_end_pos = test_start_pos + self.test_size
500
+
501
+ # Training + validation comes before gap
502
+ train_val_end_pos = test_start_pos - self.gap
503
+
504
+ # Split train/val from the expanding window
505
+ if val_size > 0:
506
+ # Calculate validation size dynamically based on current expanding window
507
+ # This ensures val_ratio is respected across all folds as window expands
508
+ current_val_size = int(train_val_end_pos * self.val_ratio)
509
+ train_end_pos = train_val_end_pos - current_val_size
510
+ train_indices = sorted_indices[0:train_end_pos]
511
+ val_indices = sorted_indices[
512
+ train_end_pos:train_val_end_pos
513
+ ]
514
+ else:
515
+ train_indices = sorted_indices[0:train_val_end_pos]
516
+ val_indices = np.array([])
517
+
518
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
519
+
520
+ # Apply undersampling if enabled and y is provided
521
+ if self.undersample and y is not None:
522
+ train_indices = self._undersample_indices(
523
+ train_indices, y, timestamps
524
+ )
525
+ # Also undersample validation set if it exists
526
+ if len(val_indices) > 0:
527
+ val_indices = self._undersample_indices(
528
+ val_indices, y, timestamps
529
+ )
530
+
531
+ assert (
532
+ len(train_indices) > 0 and len(test_indices) > 0
533
+ ), "Empty window"
534
+
535
+ yield train_indices, val_indices, test_indices
536
+
537
+ # Move test window forward by step_size
538
+ test_start_pos += self.step_size
539
+
540
+ else:
541
+ # Fixed sliding window with validation
542
+ total_window = self.window_size + self.gap + self.test_size
543
+
544
+ for start in range(
545
+ 0, n_samples - total_window + 1, self.step_size
546
+ ):
547
+ # These positions are in the sorted (temporal) domain
548
+ train_end = start + actual_train_size
549
+
550
+ # Validation comes after train with optional gap
551
+ val_start = train_end + (self.gap if val_size > 0 else 0)
552
+ val_end = val_start + val_size
553
+
554
+ # Test comes after validation with gap
555
+ test_start = (
556
+ val_end + self.gap
557
+ if val_size > 0
558
+ else train_end + self.gap
559
+ )
560
+ test_end = test_start + self.test_size
561
+
562
+ if test_end > n_samples:
563
+ break
564
+
565
+ # Extract indices from the temporally sorted sequence
566
+ train_indices = sorted_indices[start:train_end]
567
+ val_indices = (
568
+ sorted_indices[val_start:val_end]
569
+ if val_size > 0
570
+ else np.array([])
571
+ )
572
+ test_indices = sorted_indices[test_start:test_end]
573
+
574
+ # Apply undersampling if enabled and y is provided
575
+ if self.undersample and y is not None:
576
+ train_indices = self._undersample_indices(
577
+ train_indices, y, timestamps
578
+ )
579
+ # Also undersample validation set if it exists
580
+ if len(val_indices) > 0:
581
+ val_indices = self._undersample_indices(
582
+ val_indices, y, timestamps
583
+ )
584
+
585
+ # Ensure temporal order is preserved
586
+ assert (
587
+ len(train_indices) > 0 and len(test_indices) > 0
588
+ ), "Empty window"
589
+
590
+ yield train_indices, val_indices, test_indices
591
+
592
+ def get_n_splits(self, X=None, y=None, groups=None):
593
+ """
594
+ Calculate number of splits.
595
+
596
+ Parameters
597
+ ----------
598
+ X : array-like, shape (n_samples, n_features), optional
599
+ Training data (required to determine number of splits in manual mode)
600
+ y : array-like, optional
601
+ Not used
602
+ groups : array-like, optional
603
+ Not used
604
+
605
+ Returns
606
+ -------
607
+ n_splits : int
608
+ Number of splits. Returns -1 if X is None and not in n_splits mode.
609
+ """
610
+ # If using n_splits mode, return the specified n_splits
611
+ if self.n_splits_mode:
612
+ return self._n_splits
613
+
614
+ # Manual mode: need data to calculate
615
+ if X is None:
616
+ return -1 # Can't determine without data
617
+
618
+ n_samples = _num_samples(X)
619
+ total_window = self.window_size + self.gap + self.test_size
620
+ n_windows = (n_samples - total_window) // self.step_size + 1
621
+ return max(0, n_windows)
622
+
623
+ def plot_splits(
624
+ self, X, y=None, timestamps=None, figsize=(12, 6), save_path=None
625
+ ):
626
+ """
627
+ Visualize the sliding window splits as rectangles.
628
+
629
+ Shows train (blue), validation (green), and test (red) sets.
630
+ When val_ratio=0, only shows train and test.
631
+ When undersampling is enabled, shows dropped samples in gray.
632
+
633
+ Parameters
634
+ ----------
635
+ X : array-like
636
+ Training data
637
+ y : array-like, optional
638
+ Target variable (required for undersampling visualization)
639
+ timestamps : array-like, optional
640
+ Timestamps (if None, uses sample indices)
641
+ figsize : tuple, default (12, 6)
642
+ Figure size
643
+ save_path : str, optional
644
+ Path to save the plot
645
+
646
+ Returns
647
+ -------
648
+ fig : matplotlib.figure.Figure
649
+ The created figure
650
+ """
651
+ # Use sample indices if no timestamps provided
652
+ if timestamps is None:
653
+ timestamps = np.arange(len(X))
654
+
655
+ # Get temporal ordering
656
+ time_order = np.argsort(timestamps)
657
+ sorted_timestamps = timestamps[time_order]
658
+
659
+ # Get splits WITH undersampling (if enabled)
660
+ if self.val_ratio > 0:
661
+ splits = list(self.split_with_val(X, y, timestamps))[:10]
662
+ split_type = "train-val-test"
663
+ else:
664
+ splits = list(self.split(X, y, timestamps))[:10]
665
+ split_type = "train-test"
666
+
667
+ if not splits:
668
+ raise ValueError("No splits generated")
669
+
670
+ # If undersampling is enabled, also get splits WITHOUT undersampling to show dropped samples
671
+ splits_no_undersample = None
672
+ if self.undersample and y is not None:
673
+ original_undersample = self.undersample
674
+ self.undersample = False # Temporarily disable
675
+ if self.val_ratio > 0:
676
+ splits_no_undersample = list(
677
+ self.split_with_val(X, y, timestamps)
678
+ )[:10]
679
+ else:
680
+ splits_no_undersample = list(self.split(X, y, timestamps))[:10]
681
+ self.undersample = original_undersample # Restore
682
+
683
+ # Create figure
684
+ fig, ax = stx.plt.subplots(figsize=figsize)
685
+
686
+ # Plot each fold based on temporal position
687
+ for fold, split_indices in enumerate(splits):
688
+ y_pos = fold
689
+
690
+ if len(split_indices) == 3: # train, val, test
691
+ train_idx, val_idx, test_idx = split_indices
692
+
693
+ # Find temporal positions of train indices
694
+ train_positions = []
695
+ for idx in train_idx:
696
+ temp_pos = np.where(time_order == idx)[0][
697
+ 0
698
+ ] # Find position in sorted order
699
+ train_positions.append(temp_pos)
700
+
701
+ # Plot train window based on temporal positions
702
+ if train_positions:
703
+ train_start = min(train_positions)
704
+ train_end = max(train_positions)
705
+ train_rect = patches.Rectangle(
706
+ (train_start, y_pos - 0.3),
707
+ train_end - train_start + 1,
708
+ 0.6,
709
+ linewidth=1,
710
+ edgecolor="blue",
711
+ facecolor="lightblue",
712
+ alpha=0.7,
713
+ label="Train" if fold == 0 else "",
714
+ )
715
+ ax.add_patch(train_rect)
716
+
717
+ # Find temporal positions of validation indices
718
+ if len(val_idx) > 0:
719
+ val_positions = []
720
+ for idx in val_idx:
721
+ temp_pos = np.where(time_order == idx)[0][0]
722
+ val_positions.append(temp_pos)
723
+
724
+ # Plot validation window
725
+ if val_positions:
726
+ val_start = min(val_positions)
727
+ val_end = max(val_positions)
728
+ val_rect = patches.Rectangle(
729
+ (val_start, y_pos - 0.3),
730
+ val_end - val_start + 1,
731
+ 0.6,
732
+ linewidth=1,
733
+ edgecolor="green",
734
+ facecolor="lightgreen",
735
+ alpha=0.7,
736
+ label="Validation" if fold == 0 else "",
737
+ )
738
+ ax.add_patch(val_rect)
739
+
740
+ # Find temporal positions of test indices
741
+ test_positions = []
742
+ for idx in test_idx:
743
+ temp_pos = np.where(time_order == idx)[0][
744
+ 0
745
+ ] # Find position in sorted order
746
+ test_positions.append(temp_pos)
747
+
748
+ # Plot test window based on temporal positions
749
+ if test_positions:
750
+ test_start = min(test_positions)
751
+ test_end = max(test_positions)
752
+ test_rect = patches.Rectangle(
753
+ (test_start, y_pos - 0.3),
754
+ test_end - test_start + 1,
755
+ 0.6,
756
+ linewidth=1,
757
+ edgecolor=COLORS["RGBA_NORM"]["red"],
758
+ facecolor=COLORS["RGBA_NORM"]["red"],
759
+ alpha=0.7,
760
+ label="Test" if fold == 0 else "",
761
+ )
762
+ ax.add_patch(test_rect)
763
+
764
+ else: # train, test (2-way split)
765
+ train_idx, test_idx = split_indices
766
+
767
+ # Find temporal positions of train indices
768
+ train_positions = []
769
+ for idx in train_idx:
770
+ temp_pos = np.where(time_order == idx)[0][
771
+ 0
772
+ ] # Find position in sorted order
773
+ train_positions.append(temp_pos)
774
+
775
+ # Plot train window based on temporal positions
776
+ if train_positions:
777
+ train_start = min(train_positions)
778
+ train_end = max(train_positions)
779
+ train_rect = patches.Rectangle(
780
+ (train_start, y_pos - 0.3),
781
+ train_end - train_start + 1,
782
+ 0.6,
783
+ linewidth=1,
784
+ edgecolor=COLORS["RGBA_NORM"]["light_blue"],
785
+ facecolor=COLORS["RGBA_NORM"]["light_blue"],
786
+ alpha=0.7,
787
+ label="Train" if fold == 0 else "",
788
+ )
789
+ ax.add_patch(train_rect)
790
+
791
+ # Find temporal positions of test indices
792
+ test_positions = []
793
+ for idx in test_idx:
794
+ temp_pos = np.where(time_order == idx)[0][
795
+ 0
796
+ ] # Find position in sorted order
797
+ test_positions.append(temp_pos)
798
+
799
+ # Plot test window based on temporal positions
800
+ if test_positions:
801
+ test_start = min(test_positions)
802
+ test_end = max(test_positions)
803
+ test_rect = patches.Rectangle(
804
+ (test_start, y_pos - 0.3),
805
+ test_end - test_start + 1,
806
+ 0.6,
807
+ linewidth=1,
808
+ edgecolor="red",
809
+ facecolor="lightcoral",
810
+ alpha=0.7,
811
+ label="Test" if fold == 0 else "",
812
+ )
813
+ ax.add_patch(test_rect)
814
+
815
+ # Add scatter plots of actual data points with jittering
816
+ np.random.seed(42) # For reproducible jittering
817
+ jitter_strength = 0.15 # Amount of vertical jittering
818
+
819
+ # First, plot dropped samples in gray if undersampling is enabled
820
+ if splits_no_undersample is not None:
821
+ for fold, split_indices_no_us in enumerate(splits_no_undersample):
822
+ y_pos = fold
823
+ split_indices_us = splits[fold]
824
+
825
+ if len(split_indices_no_us) == 3: # train, val, test
826
+ train_idx_no_us, val_idx_no_us, test_idx_no_us = (
827
+ split_indices_no_us
828
+ )
829
+ train_idx_us, val_idx_us, test_idx_us = split_indices_us
830
+
831
+ # Find dropped train samples
832
+ dropped_train = np.setdiff1d(train_idx_no_us, train_idx_us)
833
+ if len(dropped_train) > 0:
834
+ dropped_train_positions = [
835
+ np.where(time_order == idx)[0][0]
836
+ for idx in dropped_train
837
+ ]
838
+ dropped_train_jitter = np.random.normal(
839
+ 0, jitter_strength, len(dropped_train_positions)
840
+ )
841
+ ax.plot_scatter(
842
+ dropped_train_positions,
843
+ y_pos + dropped_train_jitter,
844
+ c="gray",
845
+ s=15,
846
+ alpha=0.3,
847
+ marker="x",
848
+ label="Dropped (train)" if fold == 0 else "",
849
+ zorder=2,
850
+ )
851
+
852
+ # Find dropped validation samples
853
+ dropped_val = np.setdiff1d(val_idx_no_us, val_idx_us)
854
+ if len(dropped_val) > 0:
855
+ dropped_val_positions = [
856
+ np.where(time_order == idx)[0][0]
857
+ for idx in dropped_val
858
+ ]
859
+ dropped_val_jitter = np.random.normal(
860
+ 0, jitter_strength, len(dropped_val_positions)
861
+ )
862
+ ax.plot_scatter(
863
+ dropped_val_positions,
864
+ y_pos + dropped_val_jitter,
865
+ c="gray",
866
+ s=15,
867
+ alpha=0.3,
868
+ marker="x",
869
+ label="Dropped (val)" if fold == 0 else "",
870
+ zorder=2,
871
+ )
872
+
873
+ else: # train, test (2-way split)
874
+ train_idx_no_us, test_idx_no_us = split_indices_no_us
875
+ train_idx_us, test_idx_us = split_indices_us
876
+
877
+ # Find dropped train samples
878
+ dropped_train = np.setdiff1d(train_idx_no_us, train_idx_us)
879
+ if len(dropped_train) > 0:
880
+ dropped_train_positions = [
881
+ np.where(time_order == idx)[0][0]
882
+ for idx in dropped_train
883
+ ]
884
+ dropped_train_jitter = np.random.normal(
885
+ 0, jitter_strength, len(dropped_train_positions)
886
+ )
887
+ ax.plot_scatter(
888
+ dropped_train_positions,
889
+ y_pos + dropped_train_jitter,
890
+ c="gray",
891
+ s=15,
892
+ alpha=0.3,
893
+ marker="x",
894
+ label="Dropped samples" if fold == 0 else "",
895
+ zorder=2,
896
+ )
897
+
898
+ # Then, plot kept samples in color
899
+ for fold, split_indices in enumerate(splits):
900
+ y_pos = fold
901
+
902
+ if len(split_indices) == 3: # train, val, test
903
+ train_idx, val_idx, test_idx = split_indices
904
+
905
+ # Find temporal positions for scatter plot
906
+ train_positions = []
907
+ for idx in train_idx:
908
+ temp_pos = np.where(time_order == idx)[0][0]
909
+ train_positions.append(temp_pos)
910
+
911
+ val_positions = []
912
+ if len(val_idx) > 0:
913
+ for idx in val_idx:
914
+ temp_pos = np.where(time_order == idx)[0][0]
915
+ val_positions.append(temp_pos)
916
+
917
+ test_positions = []
918
+ for idx in test_idx:
919
+ temp_pos = np.where(time_order == idx)[0][0]
920
+ test_positions.append(temp_pos)
921
+
922
+ # Add jittered scatter plots for 3-way split
923
+ if train_positions:
924
+ train_jitter = np.random.normal(
925
+ 0, jitter_strength, len(train_positions)
926
+ )
927
+ # Color by class if y is provided
928
+ if y is not None:
929
+ train_colors = [
930
+ stx.plt.color.PARAMS["RGBA_NORM"]["blue"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["light_blue"]
931
+ for idx in train_idx
932
+ ]
933
+ ax.plot_scatter(
934
+ train_positions,
935
+ y_pos + train_jitter,
936
+ c=train_colors,
937
+ s=20,
938
+ alpha=0.7,
939
+ marker="o",
940
+ label="Train (class 0)" if fold == 0 else "",
941
+ zorder=3,
942
+ )
943
+ else:
944
+ ax.plot_scatter(
945
+ train_positions,
946
+ y_pos + train_jitter,
947
+ c="darkblue",
948
+ s=20,
949
+ alpha=0.7,
950
+ marker="o",
951
+ label="Train points" if fold == 0 else "",
952
+ zorder=3,
953
+ )
954
+
955
+ if val_positions:
956
+ val_jitter = np.random.normal(
957
+ 0, jitter_strength, len(val_positions)
958
+ )
959
+ # Color by class if y is provided
960
+ if y is not None:
961
+ val_colors = [
962
+ stx.plt.color.PARAMS["RGBA_NORM"]["yellow"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["orange"]
963
+ for idx in val_idx
964
+ ]
965
+ ax.plot_scatter(
966
+ val_positions,
967
+ y_pos + val_jitter,
968
+ c=val_colors,
969
+ s=20,
970
+ alpha=0.7,
971
+ marker="^",
972
+ label="Val (class 0)" if fold == 0 else "",
973
+ zorder=3,
974
+ )
975
+ else:
976
+ ax.plot_scatter(
977
+ val_positions,
978
+ y_pos + val_jitter,
979
+ c="darkgreen",
980
+ s=20,
981
+ alpha=0.7,
982
+ marker="^",
983
+ label="Val points" if fold == 0 else "",
984
+ zorder=3,
985
+ )
986
+
987
+ if test_positions:
988
+ test_jitter = np.random.normal(
989
+ 0, jitter_strength, len(test_positions)
990
+ )
991
+ # Color by class if y is provided
992
+ if y is not None:
993
+ test_colors = [
994
+ stx.plt.color.PARAMS["RGBA_NORM"]["red"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["brown"]
995
+ for idx in test_idx
996
+ ]
997
+ ax.plot_scatter(
998
+ test_positions,
999
+ y_pos + test_jitter,
1000
+ c=test_colors,
1001
+ s=20,
1002
+ alpha=0.7,
1003
+ marker="s",
1004
+ label="Test (class 0)" if fold == 0 else "",
1005
+ zorder=3,
1006
+ )
1007
+ else:
1008
+ ax.plot_scatter(
1009
+ test_positions,
1010
+ y_pos + test_jitter,
1011
+ c="darkred",
1012
+ s=20,
1013
+ alpha=0.7,
1014
+ marker="s",
1015
+ label="Test points" if fold == 0 else "",
1016
+ zorder=3,
1017
+ )
1018
+
1019
+ else: # train, test (2-way split)
1020
+ train_idx, test_idx = split_indices
1021
+
1022
+ # Get actual timestamps for train and test indices
1023
+ train_times = (
1024
+ timestamps[train_idx]
1025
+ if timestamps is not None
1026
+ else train_idx
1027
+ )
1028
+ test_times = (
1029
+ timestamps[test_idx]
1030
+ if timestamps is not None
1031
+ else test_idx
1032
+ )
1033
+
1034
+ # Find temporal positions for scatter plot
1035
+ train_positions = []
1036
+ for idx in train_idx:
1037
+ temp_pos = np.where(time_order == idx)[0][0]
1038
+ train_positions.append(temp_pos)
1039
+
1040
+ test_positions = []
1041
+ for idx in test_idx:
1042
+ temp_pos = np.where(time_order == idx)[0][0]
1043
+ test_positions.append(temp_pos)
1044
+
1045
+ # Add jittered scatter plots for 2-way split
1046
+ if train_positions:
1047
+ train_jitter = np.random.normal(
1048
+ 0, jitter_strength, len(train_positions)
1049
+ )
1050
+ # Color by class if y is provided
1051
+ if y is not None:
1052
+ train_colors = [
1053
+ stx.plt.color.PARAMS["RGBA_NORM"]["blue"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["light_blue"]
1054
+ for idx in train_idx
1055
+ ]
1056
+ ax.plot_scatter(
1057
+ train_positions,
1058
+ y_pos + train_jitter,
1059
+ c=train_colors,
1060
+ s=20,
1061
+ alpha=0.7,
1062
+ marker="o",
1063
+ label="Train (class 0)" if fold == 0 else "",
1064
+ zorder=3,
1065
+ )
1066
+ else:
1067
+ ax.plot_scatter(
1068
+ train_positions,
1069
+ y_pos + train_jitter,
1070
+ c="darkblue",
1071
+ s=20,
1072
+ alpha=0.7,
1073
+ marker="o",
1074
+ label="Train points" if fold == 0 else "",
1075
+ zorder=3,
1076
+ )
1077
+
1078
+ if test_positions:
1079
+ test_jitter = np.random.normal(
1080
+ 0, jitter_strength, len(test_positions)
1081
+ )
1082
+ # Color by class if y is provided
1083
+ if y is not None:
1084
+ test_colors = [
1085
+ stx.plt.color.PARAMS["RGBA_NORM"]["red"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["brown"]
1086
+ for idx in test_idx
1087
+ ]
1088
+ ax.plot_scatter(
1089
+ test_positions,
1090
+ y_pos + test_jitter,
1091
+ c=test_colors,
1092
+ s=20,
1093
+ alpha=0.7,
1094
+ marker="s",
1095
+ label="Test (class 0)" if fold == 0 else "",
1096
+ zorder=3,
1097
+ )
1098
+ else:
1099
+ ax.plot_scatter(
1100
+ test_positions,
1101
+ y_pos + test_jitter,
1102
+ c="darkred",
1103
+ s=20,
1104
+ alpha=0.7,
1105
+ marker="s",
1106
+ label="Test points" if fold == 0 else "",
1107
+ zorder=3,
1108
+ )
1109
+
1110
+ # Format plot
1111
+ ax.set_ylim(-0.5, len(splits) - 0.5)
1112
+ ax.set_xlim(0, len(X))
1113
+ ax.set_xlabel("Temporal Position (sorted by timestamp)")
1114
+ ax.set_ylabel("Fold")
1115
+ gap_text = f", Gap: {self.gap}" if self.gap > 0 else ""
1116
+ val_text = (
1117
+ f", Val ratio: {self.val_ratio:.1%}" if self.val_ratio > 0 else ""
1118
+ )
1119
+ ax.set_title(
1120
+ f"Sliding Window Split Visualization ({split_type})\\n"
1121
+ f"Window: {self.window_size}, Step: {self.step_size}, Test: {self.test_size}{gap_text}{val_text}\\n"
1122
+ f"Rectangles show windows, dots show actual data points"
1123
+ )
1124
+
1125
+ # Set y-ticks
1126
+ ax.set_yticks(range(len(splits)))
1127
+ ax.set_yticklabels([f"Fold {i}" for i in range(len(splits))])
1128
+
1129
+ # Add enhanced legend with class and sample information
1130
+ if y is not None:
1131
+ # Count samples per class in total dataset
1132
+ unique_classes, class_counts = np.unique(y, return_counts=True)
1133
+ total_class_info = ", ".join(
1134
+ [
1135
+ f"Class {cls}: n={count}"
1136
+ for cls, count in zip(unique_classes, class_counts)
1137
+ ]
1138
+ )
1139
+
1140
+ # Count samples in first fold to show per-fold distribution
1141
+ first_split = splits[0]
1142
+ if len(first_split) == 3: # train, val, test
1143
+ train_idx, val_idx, test_idx = first_split
1144
+ fold_info = f"Fold 0: Train n={len(train_idx)}, Val n={len(val_idx)}, Test n={len(test_idx)}"
1145
+ else: # train, test
1146
+ train_idx, test_idx = first_split
1147
+ fold_info = (
1148
+ f"Fold 0: Train n={len(train_idx)}, Test n={len(test_idx)}"
1149
+ )
1150
+
1151
+ # Add legend with class information
1152
+ handles, labels = ax.get_legend_handles_labels()
1153
+ # Add title to legend showing class distribution
1154
+ legend_title = f"Total: {total_class_info}\\n{fold_info}"
1155
+ ax.legend(handles, labels, loc="upper right", title=legend_title)
1156
+ else:
1157
+ ax.legend(loc="upper right")
1158
+
1159
+ plt.tight_layout()
1160
+
1161
+ if save_path:
1162
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
1163
+
1164
+ return fig
1165
+
1166
+
1167
+ """Functions & Classes"""
1168
+ def main(args) -> int:
1169
+ """Demonstrate TimeSeriesSlidingWindowSplit functionality.
1170
+
1171
+ Args:
1172
+ args: Command line arguments
1173
+
1174
+ Returns:
1175
+ int: Exit status
1176
+ """
1177
+
1178
+ def demo_01_fixed_window_non_overlapping_tests(X, y, timestamps):
1179
+ """Demo 1: Fixed window size with non-overlapping test sets (DEFAULT).
1180
+
1181
+ Best for: Testing model on consistent recent history.
1182
+ Each sample tested exactly once (like K-fold for time series).
1183
+ """
1184
+ logger.info("=" * 70)
1185
+ logger.info("DEMO 1: Fixed Window + Non-overlapping Tests (DEFAULT)")
1186
+ logger.info("=" * 70)
1187
+ logger.info("Best for: Testing model on consistent recent history")
1188
+
1189
+ splitter = TimeSeriesSlidingWindowSplit(
1190
+ window_size=args.window_size,
1191
+ test_size=args.test_size,
1192
+ gap=args.gap,
1193
+ overlapping_tests=False, # Default
1194
+ expanding_window=False, # Default
1195
+ )
1196
+
1197
+ splits = list(splitter.split(X, y, timestamps))[:5]
1198
+ logger.info(f"Generated {len(splits)} splits")
1199
+
1200
+ for fold, (train_idx, test_idx) in enumerate(splits):
1201
+ logger.info(
1202
+ f" Fold {fold}: Train={len(train_idx)} (fixed), Test={len(test_idx)}"
1203
+ )
1204
+
1205
+ fig = splitter.plot_splits(X, y, timestamps)
1206
+ stx.io.save(
1207
+ fig, "./01_sliding_window_fixed.jpg", symlink_from_cwd=True
1208
+ )
1209
+ logger.info("")
1210
+
1211
+ return splits
1212
+
1213
+ def demo_02_expanding_window_non_overlapping_tests(X, y, timestamps):
1214
+ """Demo 2: Expanding window with non-overlapping test sets.
1215
+
1216
+ Best for: Using all available past data (like sklearn TimeSeriesSplit).
1217
+ Training set grows to include all historical data.
1218
+ """
1219
+ logger.info("=" * 70)
1220
+ logger.info("DEMO 2: Expanding Window + Non-overlapping Tests")
1221
+ logger.info("=" * 70)
1222
+ logger.info(
1223
+ "Best for: Using all available past data (like sklearn TimeSeriesSplit)"
1224
+ )
1225
+
1226
+ splitter = TimeSeriesSlidingWindowSplit(
1227
+ window_size=args.window_size,
1228
+ test_size=args.test_size,
1229
+ gap=args.gap,
1230
+ overlapping_tests=False,
1231
+ expanding_window=True, # Use all past data!
1232
+ )
1233
+
1234
+ splits = list(splitter.split(X, y, timestamps))[:5]
1235
+ logger.info(f"Generated {len(splits)} splits")
1236
+
1237
+ for fold, (train_idx, test_idx) in enumerate(splits):
1238
+ logger.info(
1239
+ f" Fold {fold}: Train={len(train_idx)} (growing!), Test={len(test_idx)}"
1240
+ )
1241
+
1242
+ fig = splitter.plot_splits(X, y, timestamps)
1243
+ stx.io.save(
1244
+ fig, "./02_sliding_window_expanding.jpg", symlink_from_cwd=True
1245
+ )
1246
+ logger.info("")
1247
+
1248
+ return splits
1249
+
1250
+ def demo_03_fixed_window_overlapping_tests(X, y, timestamps):
1251
+ """Demo 3: Fixed window with overlapping test sets.
1252
+
1253
+ Best for: Maximum evaluation points (like K-fold training reuse).
1254
+ Test sets can overlap for more frequent model evaluation.
1255
+ """
1256
+ logger.info("=" * 70)
1257
+ logger.info("DEMO 3: Fixed Window + Overlapping Tests")
1258
+ logger.info("=" * 70)
1259
+ logger.info(
1260
+ "Best for: Maximum evaluation points (like K-fold for training)"
1261
+ )
1262
+
1263
+ splitter = TimeSeriesSlidingWindowSplit(
1264
+ window_size=args.window_size,
1265
+ test_size=args.test_size,
1266
+ gap=args.gap,
1267
+ overlapping_tests=True, # Allow test overlap
1268
+ expanding_window=False,
1269
+ # step_size will default to test_size // 2 for 50% overlap
1270
+ )
1271
+
1272
+ splits = list(splitter.split(X, y, timestamps))[:5]
1273
+ logger.info(f"Generated {len(splits)} splits")
1274
+
1275
+ for fold, (train_idx, test_idx) in enumerate(splits):
1276
+ logger.info(
1277
+ f" Fold {fold}: Train={len(train_idx)}, Test={len(test_idx)}"
1278
+ )
1279
+
1280
+ fig = splitter.plot_splits(X, y, timestamps)
1281
+ stx.io.save(
1282
+ fig, "./03_sliding_window_overlapping.jpg", symlink_from_cwd=True
1283
+ )
1284
+ logger.info("")
1285
+
1286
+ return splits
1287
+
1288
+ def demo_04_undersample_imbalanced_data(X, y_imbalanced, timestamps):
1289
+ """Demo 4: Undersampling for imbalanced time series data.
1290
+
1291
+ Best for: Handling class imbalance in training sets.
1292
+ Balances classes by randomly undersampling majority class.
1293
+ """
1294
+ logger.info("=" * 70)
1295
+ logger.info("DEMO 4: Undersampling for Imbalanced Data")
1296
+ logger.info("=" * 70)
1297
+ logger.info("Best for: Handling class imbalance in time series")
1298
+
1299
+ # Show data imbalance
1300
+ unique, counts = np.unique(y_imbalanced, return_counts=True)
1301
+ logger.info(f"Class distribution: {dict(zip(unique, counts))}")
1302
+ logger.info("")
1303
+
1304
+ # Without undersampling
1305
+ splitter_no_undersample = TimeSeriesSlidingWindowSplit(
1306
+ window_size=args.window_size,
1307
+ test_size=args.test_size,
1308
+ gap=args.gap,
1309
+ undersample=False,
1310
+ )
1311
+
1312
+ splits_no_us = list(
1313
+ splitter_no_undersample.split(X, y_imbalanced, timestamps)
1314
+ )[:3]
1315
+ logger.info(f"WITHOUT undersampling: {len(splits_no_us)} splits")
1316
+ for fold, (train_idx, test_idx) in enumerate(splits_no_us):
1317
+ train_labels = y_imbalanced[train_idx]
1318
+ train_unique, train_counts = np.unique(
1319
+ train_labels, return_counts=True
1320
+ )
1321
+ logger.info(
1322
+ f" Fold {fold}: Train size={len(train_idx)}, "
1323
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1324
+ )
1325
+ logger.info("")
1326
+
1327
+ # With undersampling
1328
+ splitter_undersample = TimeSeriesSlidingWindowSplit(
1329
+ window_size=args.window_size,
1330
+ test_size=args.test_size,
1331
+ gap=args.gap,
1332
+ undersample=True, # Enable undersampling!
1333
+ random_state=42,
1334
+ )
1335
+
1336
+ splits_us = list(
1337
+ splitter_undersample.split(X, y_imbalanced, timestamps)
1338
+ )[:3]
1339
+ logger.info(f"WITH undersampling: {len(splits_us)} splits")
1340
+ for fold, (train_idx, test_idx) in enumerate(splits_us):
1341
+ train_labels = y_imbalanced[train_idx]
1342
+ train_unique, train_counts = np.unique(
1343
+ train_labels, return_counts=True
1344
+ )
1345
+ logger.info(
1346
+ f" Fold {fold}: Train size={len(train_idx)} (balanced!), "
1347
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1348
+ )
1349
+
1350
+ # Save visualization for undersampling
1351
+ fig = splitter_undersample.plot_splits(X, y_imbalanced, timestamps)
1352
+ stx.io.save(
1353
+ fig, "./04_sliding_window_undersample.jpg", symlink_from_cwd=True
1354
+ )
1355
+ logger.info("")
1356
+
1357
+ return splits_us
1358
+
1359
+ def demo_05_validation_dataset(X, y, timestamps):
1360
+ """Demo 5: Using validation dataset with train-val-test splits.
1361
+
1362
+ Best for: Model selection and hyperparameter tuning.
1363
+ Creates train/validation/test splits maintaining temporal order.
1364
+ """
1365
+ logger.info("=" * 70)
1366
+ logger.info("DEMO 5: Validation Dataset (Train-Val-Test Splits)")
1367
+ logger.info("=" * 70)
1368
+ logger.info("Best for: Model selection and hyperparameter tuning")
1369
+
1370
+ splitter = TimeSeriesSlidingWindowSplit(
1371
+ window_size=args.window_size,
1372
+ test_size=args.test_size,
1373
+ gap=args.gap,
1374
+ val_ratio=0.2, # 20% of training window for validation
1375
+ overlapping_tests=False,
1376
+ expanding_window=False,
1377
+ )
1378
+
1379
+ splits = list(splitter.split_with_val(X, y, timestamps))[:3]
1380
+ logger.info(f"Generated {len(splits)} splits")
1381
+
1382
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1383
+ logger.info(
1384
+ f" Fold {fold}: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}"
1385
+ )
1386
+
1387
+ fig = splitter.plot_splits(X, y, timestamps)
1388
+ stx.io.save(
1389
+ fig, "./05_sliding_window_validation.jpg", symlink_from_cwd=True
1390
+ )
1391
+ logger.info("")
1392
+
1393
+ return splits
1394
+
1395
+ def demo_06_expanding_with_validation(X, y, timestamps):
1396
+ """Demo 6: Expanding window with validation dataset.
1397
+
1398
+ Best for: Using all historical data with model selection.
1399
+ Combines expanding window and validation split.
1400
+ """
1401
+ logger.info("=" * 70)
1402
+ logger.info("DEMO 6: Expanding Window + Validation Dataset")
1403
+ logger.info("=" * 70)
1404
+ logger.info("Best for: Using all historical data with model selection")
1405
+
1406
+ splitter = TimeSeriesSlidingWindowSplit(
1407
+ window_size=args.window_size,
1408
+ test_size=args.test_size,
1409
+ gap=args.gap,
1410
+ val_ratio=0.2,
1411
+ overlapping_tests=False,
1412
+ expanding_window=True, # Expanding + validation!
1413
+ )
1414
+
1415
+ splits = list(splitter.split_with_val(X, y, timestamps))[:3]
1416
+ logger.info(f"Generated {len(splits)} splits")
1417
+
1418
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1419
+ logger.info(
1420
+ f" Fold {fold}: Train={len(train_idx)} (growing!), Val={len(val_idx)}, Test={len(test_idx)}"
1421
+ )
1422
+
1423
+ fig = splitter.plot_splits(X, y, timestamps)
1424
+ stx.io.save(
1425
+ fig,
1426
+ "./06_sliding_window_expanding_validation.jpg",
1427
+ symlink_from_cwd=True,
1428
+ )
1429
+ logger.info("")
1430
+
1431
+ return splits
1432
+
1433
+ def demo_07_undersample_with_validation(X, y_imbalanced, timestamps):
1434
+ """Demo 7: Undersampling with validation dataset.
1435
+
1436
+ Best for: Handling imbalanced data with hyperparameter tuning.
1437
+ Combines undersampling and validation split.
1438
+ """
1439
+
1440
+ logger.info("=" * 70)
1441
+ logger.info("DEMO 7: Undersampling + Validation Dataset")
1442
+ logger.info("=" * 70)
1443
+ logger.info("Best for: Imbalanced data with hyperparameter tuning")
1444
+
1445
+ splitter = TimeSeriesSlidingWindowSplit(
1446
+ window_size=args.window_size,
1447
+ test_size=args.test_size,
1448
+ gap=args.gap,
1449
+ val_ratio=0.2,
1450
+ undersample=True, # Undersample + validation!
1451
+ random_state=42,
1452
+ )
1453
+
1454
+ splits = list(splitter.split_with_val(X, y_imbalanced, timestamps))[:3]
1455
+ logger.info(f"Generated {len(splits)} splits")
1456
+
1457
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1458
+ train_labels = y_imbalanced[train_idx]
1459
+ train_unique, train_counts = np.unique(
1460
+ train_labels, return_counts=True
1461
+ )
1462
+ logger.info(
1463
+ f" Fold {fold}: Train={len(train_idx)} (balanced!), Val={len(val_idx)}, Test={len(test_idx)}, "
1464
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1465
+ )
1466
+
1467
+ fig = splitter.plot_splits(X, y_imbalanced, timestamps)
1468
+ stx.io.save(
1469
+ fig,
1470
+ "./07_sliding_window_undersample_validation.jpg",
1471
+ symlink_from_cwd=True,
1472
+ )
1473
+ logger.info("")
1474
+
1475
+ return splits
1476
+
1477
+ def demo_08_all_options_combined(X, y_imbalanced, timestamps):
1478
+ """Demo 8: All options combined.
1479
+
1480
+ Best for: Maximum flexibility - expanding window, undersampling, and validation.
1481
+ Shows all features working together.
1482
+ """
1483
+ logger.info("=" * 70)
1484
+ logger.info(
1485
+ "DEMO 8: Expanding + Undersampling + Validation (ALL OPTIONS)"
1486
+ )
1487
+ logger.info("=" * 70)
1488
+ logger.info("Best for: Comprehensive time series CV with all features")
1489
+
1490
+ splitter = TimeSeriesSlidingWindowSplit(
1491
+ window_size=args.window_size,
1492
+ test_size=args.test_size,
1493
+ gap=args.gap,
1494
+ val_ratio=0.2,
1495
+ overlapping_tests=False,
1496
+ expanding_window=True, # All three!
1497
+ undersample=True,
1498
+ random_state=42,
1499
+ )
1500
+
1501
+ splits = list(splitter.split_with_val(X, y_imbalanced, timestamps))[:3]
1502
+ logger.info(f"Generated {len(splits)} splits")
1503
+
1504
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1505
+ train_labels = y_imbalanced[train_idx]
1506
+ train_unique, train_counts = np.unique(
1507
+ train_labels, return_counts=True
1508
+ )
1509
+ logger.info(
1510
+ f" Fold {fold}: Train={len(train_idx)} (growing & balanced!), Val={len(val_idx)}, Test={len(test_idx)}, "
1511
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1512
+ )
1513
+
1514
+ fig = splitter.plot_splits(X, y_imbalanced, timestamps)
1515
+ stx.io.save(
1516
+ fig, "./08_sliding_window_all_options.jpg", symlink_from_cwd=True
1517
+ )
1518
+ logger.info("")
1519
+
1520
+ return splits
1521
+
1522
+ def print_summary(
1523
+ splits_fixed,
1524
+ splits_expanding,
1525
+ splits_overlap,
1526
+ splits_undersample=None,
1527
+ splits_validation=None,
1528
+ splits_expanding_val=None,
1529
+ splits_undersample_val=None,
1530
+ splits_all_options=None,
1531
+ ):
1532
+ """Print comparison summary of all modes."""
1533
+ logger.info("=" * 70)
1534
+ logger.info("SUMMARY COMPARISON")
1535
+ logger.info("=" * 70)
1536
+ logger.info(
1537
+ f"01. Fixed window (non-overlap): {len(splits_fixed)} folds, train size constant"
1538
+ )
1539
+ logger.info(
1540
+ f"02. Expanding window (non-overlap): {len(splits_expanding)} folds, train size grows"
1541
+ )
1542
+ logger.info(
1543
+ f"03. Fixed window (overlapping): {len(splits_overlap)} folds, more eval points"
1544
+ )
1545
+ if splits_undersample is not None:
1546
+ logger.info(
1547
+ f"04. With undersampling: {len(splits_undersample)} folds, balanced classes"
1548
+ )
1549
+ if splits_validation is not None:
1550
+ logger.info(
1551
+ f"05. With validation set: {len(splits_validation)} folds, train-val-test"
1552
+ )
1553
+ if splits_expanding_val is not None:
1554
+ logger.info(
1555
+ f"06. Expanding + validation: {len(splits_expanding_val)} folds, growing train with val"
1556
+ )
1557
+ if splits_undersample_val is not None:
1558
+ logger.info(
1559
+ f"07. Undersample + validation: {len(splits_undersample_val)} folds, balanced with val"
1560
+ )
1561
+ if splits_all_options is not None:
1562
+ logger.info(
1563
+ f"08. All options combined: {len(splits_all_options)} folds, expanding + balanced + val"
1564
+ )
1565
+ logger.info("")
1566
+ logger.info("Key Insights:")
1567
+ logger.info(
1568
+ " - Non-overlapping tests (default): Each sample tested exactly once"
1569
+ )
1570
+ logger.info(
1571
+ " - Expanding window: Maximizes training data, like sklearn TimeSeriesSplit"
1572
+ )
1573
+ logger.info(
1574
+ " - Overlapping tests: More evaluation points, like K-fold training reuse"
1575
+ )
1576
+ if splits_undersample is not None:
1577
+ logger.info(
1578
+ " - Undersampling: Balances imbalanced classes in training sets"
1579
+ )
1580
+ if splits_validation is not None:
1581
+ logger.info(
1582
+ " - Validation set: Enables hyperparameter tuning with temporal order"
1583
+ )
1584
+ if splits_all_options is not None:
1585
+ logger.info(
1586
+ " - Combined options: Maximum flexibility for complex time series CV"
1587
+ )
1588
+ logger.info("=" * 70)
1589
+
1590
+ # Main execution
1591
+ logger.info("=" * 70)
1592
+ logger.info("Demonstrating TimeSeriesSlidingWindowSplit with New Options")
1593
+ logger.info("=" * 70)
1594
+
1595
+ # Generate test data
1596
+ np.random.seed(42)
1597
+ n_samples = args.n_samples
1598
+ X = np.random.randn(n_samples, 5)
1599
+ y = np.random.randint(0, 2, n_samples) # Balanced
1600
+ timestamps = np.arange(n_samples) + np.random.normal(0, 0.1, n_samples)
1601
+
1602
+ # Create imbalanced labels (80% class 0, 20% class 1)
1603
+ y_imbalanced = np.zeros(n_samples, dtype=int)
1604
+ n_minority = int(n_samples * 0.2)
1605
+ minority_indices = np.random.choice(
1606
+ n_samples, size=n_minority, replace=False
1607
+ )
1608
+ y_imbalanced[minority_indices] = 1
1609
+
1610
+ logger.info(
1611
+ f"Generated test data: {n_samples} samples, {X.shape[1]} features"
1612
+ )
1613
+ logger.info("")
1614
+
1615
+ # Run demos
1616
+ splits_fixed = demo_01_fixed_window_non_overlapping_tests(X, y, timestamps)
1617
+ splits_expanding = demo_02_expanding_window_non_overlapping_tests(
1618
+ X, y, timestamps
1619
+ )
1620
+ splits_overlap = demo_03_fixed_window_overlapping_tests(X, y, timestamps)
1621
+ splits_undersample = demo_04_undersample_imbalanced_data(
1622
+ X, y_imbalanced, timestamps
1623
+ )
1624
+ splits_validation = demo_05_validation_dataset(X, y, timestamps)
1625
+ splits_expanding_val = demo_06_expanding_with_validation(X, y, timestamps)
1626
+ splits_undersample_val = demo_07_undersample_with_validation(
1627
+ X, y_imbalanced, timestamps
1628
+ )
1629
+ splits_all_options = demo_08_all_options_combined(
1630
+ X, y_imbalanced, timestamps
1631
+ )
1632
+
1633
+ # Print summary
1634
+ print_summary(
1635
+ splits_fixed,
1636
+ splits_expanding,
1637
+ splits_overlap,
1638
+ splits_undersample,
1639
+ splits_validation,
1640
+ splits_expanding_val,
1641
+ splits_undersample_val,
1642
+ splits_all_options,
1643
+ )
1644
+
1645
+ return 0
1646
+
1647
+
1648
+ def parse_args() -> argparse.Namespace:
1649
+ """Parse command line arguments."""
1650
+ parser = argparse.ArgumentParser(
1651
+ description="Demonstrate TimeSeriesSlidingWindowSplit with overlapping_tests and expanding_window options"
1652
+ )
1653
+ parser.add_argument(
1654
+ "--n-samples",
1655
+ type=int,
1656
+ default=200,
1657
+ help="Number of samples to generate (default: %(default)s)",
1658
+ )
1659
+ parser.add_argument(
1660
+ "--window-size",
1661
+ type=int,
1662
+ default=50,
1663
+ help="Size of training window (default: %(default)s)",
1664
+ )
1665
+ parser.add_argument(
1666
+ "--test-size",
1667
+ type=int,
1668
+ default=20,
1669
+ help="Size of test window (default: %(default)s)",
1670
+ )
1671
+ parser.add_argument(
1672
+ "--gap",
1673
+ type=int,
1674
+ default=5,
1675
+ help="Gap between train and test (default: %(default)s)",
1676
+ )
1677
+ args = parser.parse_args()
1678
+ return args
1679
+
1680
+
1681
+ def run_main() -> None:
1682
+ """Initialize scitex framework, run main function, and cleanup."""
1683
+ global CONFIG, CC, sys, plt, rng
1684
+
1685
+ import sys
1686
+
1687
+ import matplotlib.pyplot as plt
1688
+ import scitex as stx
1689
+
1690
+ args = parse_args()
1691
+
1692
+ CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
1693
+ sys,
1694
+ plt,
1695
+ args=args,
1696
+ file=__FILE__,
1697
+ sdir_suffix=None,
1698
+ verbose=False,
1699
+ agg=True,
1700
+ )
1701
+
1702
+ exit_status = main(args)
1703
+
1704
+ stx.session.close(
1705
+ CONFIG,
1706
+ verbose=False,
1707
+ notify=False,
1708
+ message="",
1709
+ exit_status=exit_status,
1710
+ )
1711
+
1712
+
1713
+ if __name__ == "__main__":
1714
+ run_main()
1715
+
1716
+ # EOF