scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1685 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-03 03:22:45 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """
15
+ Functionalities:
16
+ - Implements sliding window cross-validation for time series
17
+ - Creates overlapping train/test windows that slide through time
18
+ - Supports temporal gaps between train and test sets
19
+ - Provides visualization with scatter plots showing actual data points
20
+ - Validates temporal order in all windows
21
+ - Ensures no data leakage between train and test sets
22
+
23
+ Dependencies:
24
+ - packages:
25
+ - numpy
26
+ - sklearn
27
+ - matplotlib
28
+ - scitex
29
+
30
+ IO:
31
+ - input-files:
32
+ - None (generates synthetic data for demonstration)
33
+ - output-files:
34
+ - ./sliding_window_demo.png (visualization with scatter plots)
35
+ """
36
+
37
+ """Imports"""
38
+ import argparse
39
+ from typing import Iterator, Optional, Tuple
40
+
41
+ import matplotlib.patches as patches
42
+ import matplotlib.pyplot as plt
43
+ import numpy as np
44
+ import scitex as stx
45
+ from scitex import logging
46
+ from sklearn.model_selection import BaseCrossValidator
47
+ from sklearn.utils.validation import _num_samples
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+ COLORS = stx.plt.color.PARAMS
52
+ COLORS["RGBA_NORM"]
53
+
54
+
55
+ class TimeSeriesSlidingWindowSplit(BaseCrossValidator):
56
+ """
57
+ Sliding window cross-validation for time series.
58
+
59
+ Creates train/test windows that slide through time with configurable behavior.
60
+
61
+ Parameters
62
+ ----------
63
+ window_size : int
64
+ Size of training window (ignored if expanding_window=True)
65
+ step_size : int
66
+ Step between windows (overridden if overlapping_tests=False)
67
+ test_size : int
68
+ Size of test window
69
+ gap : int, default=0
70
+ Number of samples to skip between train and test windows
71
+ val_ratio : float, default=0.0
72
+ Ratio of validation set from training window
73
+ random_state : int, optional
74
+ Random seed for reproducibility
75
+ overlapping_tests : bool, default=False
76
+ If False, automatically sets step_size=test_size to ensure each sample
77
+ is tested exactly once (like K-fold for time series)
78
+ expanding_window : bool, default=False
79
+ If True, training window grows to include all past data (like sklearn's
80
+ TimeSeriesSplit). If False, uses fixed sliding window of size window_size.
81
+ undersample : bool, default=False
82
+ If True, balance classes in training sets by randomly undersampling
83
+ the majority class to match the minority class count. Temporal order
84
+ is maintained. Requires y labels in split().
85
+
86
+ Examples
87
+ --------
88
+ >>> from scitex.ml.classification import TimeSeriesSlidingWindowSplit
89
+ >>> import numpy as np
90
+ >>>
91
+ >>> X = np.random.randn(100, 10)
92
+ >>> y = np.random.randint(0, 2, 100)
93
+ >>> timestamps = np.arange(100)
94
+ >>>
95
+ >>> # Fixed window, non-overlapping tests (default)
96
+ >>> swcv = TimeSeriesSlidingWindowSplit(window_size=50, test_size=10, gap=5)
97
+ >>> for train_idx, test_idx in swcv.split(X, y, timestamps):
98
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}")
99
+ >>>
100
+ >>> # Expanding window (use all past data)
101
+ >>> swcv = TimeSeriesSlidingWindowSplit(
102
+ ... window_size=50, test_size=10, gap=5, expanding_window=True
103
+ ... )
104
+ >>> for train_idx, test_idx in swcv.split(X, y, timestamps):
105
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}") # Train grows!
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ window_size: int,
111
+ step_size: Optional[int] = None,
112
+ test_size: int = 10,
113
+ gap: int = 0,
114
+ val_ratio: float = 0.0,
115
+ random_state: Optional[int] = None,
116
+ overlapping_tests: bool = False,
117
+ expanding_window: bool = False,
118
+ undersample: bool = False,
119
+ ):
120
+ self.window_size = window_size
121
+ self.test_size = test_size
122
+ self.gap = gap
123
+ self.val_ratio = val_ratio
124
+ self.random_state = random_state
125
+ self.rng = np.random.default_rng(random_state)
126
+ self.overlapping_tests = overlapping_tests
127
+ self.expanding_window = expanding_window
128
+ self.undersample = undersample
129
+ self.n_splits_mode = False
130
+ self._n_splits = None
131
+
132
+ # Handle step_size logic
133
+ if not overlapping_tests:
134
+ # overlapping_tests=False: ensure non-overlapping tests
135
+ if step_size is not None and step_size < test_size:
136
+ logger.warning(
137
+ f"overlapping_tests=False but step_size={step_size} < test_size={test_size}. "
138
+ f"This would cause test overlap. Setting step_size=test_size={test_size}."
139
+ )
140
+ self.step_size = test_size
141
+ elif step_size is None:
142
+ # Default: non-overlapping tests
143
+ self.step_size = test_size
144
+ logger.info(
145
+ f"step_size not specified with overlapping_tests=False. "
146
+ f"Using step_size=test_size={test_size} for non-overlapping tests."
147
+ )
148
+ else:
149
+ # step_size >= test_size: acceptable, no overlap
150
+ self.step_size = step_size
151
+ else:
152
+ # overlapping_tests=True: allow any step_size
153
+ if step_size is None:
154
+ # Default for overlapping: half the test size for 50% overlap
155
+ self.step_size = max(1, test_size // 2)
156
+ logger.info(
157
+ f"step_size not specified with overlapping_tests=True. "
158
+ f"Using step_size={self.step_size} (50% overlap)."
159
+ )
160
+ else:
161
+ self.step_size = step_size
162
+
163
+ def _undersample_indices(
164
+ self, train_indices: np.ndarray, y: np.ndarray, timestamps: np.ndarray
165
+ ) -> np.ndarray:
166
+ """
167
+ Undersample majority class to balance training set.
168
+
169
+ Maintains temporal order of samples.
170
+
171
+ Parameters
172
+ ----------
173
+ train_indices : ndarray
174
+ Original training indices
175
+ y : ndarray
176
+ Full label array
177
+ timestamps : ndarray
178
+ Full timestamp array
179
+
180
+ Returns
181
+ -------
182
+ ndarray
183
+ Undersampled training indices (sorted by timestamp)
184
+ """
185
+ # Get labels for training indices
186
+ train_labels = y[train_indices]
187
+
188
+ # Find unique classes and their counts
189
+ unique_classes, class_counts = np.unique(
190
+ train_labels, return_counts=True
191
+ )
192
+
193
+ if len(unique_classes) < 2:
194
+ # Only one class, no undersampling needed
195
+ return train_indices
196
+
197
+ # Find minority class count
198
+ min_count = class_counts.min()
199
+
200
+ # Undersample each class to match minority class count
201
+ undersampled_indices = []
202
+ for cls in unique_classes:
203
+ # Find indices of this class within train_indices
204
+ cls_mask = train_labels == cls
205
+ cls_train_indices = train_indices[cls_mask]
206
+
207
+ if len(cls_train_indices) > min_count:
208
+ # Randomly select min_count samples
209
+ selected = self.rng.choice(
210
+ cls_train_indices, size=min_count, replace=False
211
+ )
212
+ undersampled_indices.extend(selected)
213
+ else:
214
+ # Keep all samples from minority class
215
+ undersampled_indices.extend(cls_train_indices)
216
+
217
+ # Convert to array and sort by timestamp to maintain temporal order
218
+ undersampled_indices = np.array(undersampled_indices)
219
+ temporal_order = np.argsort(timestamps[undersampled_indices])
220
+ undersampled_indices = undersampled_indices[temporal_order]
221
+
222
+ return undersampled_indices
223
+
224
+ def split(
225
+ self,
226
+ X: np.ndarray,
227
+ y: Optional[np.ndarray] = None,
228
+ timestamps: Optional[np.ndarray] = None,
229
+ groups: Optional[np.ndarray] = None,
230
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
231
+ """
232
+ Generate sliding window splits.
233
+
234
+ Parameters
235
+ ----------
236
+ X : array-like, shape (n_samples, n_features)
237
+ Training data
238
+ y : array-like, shape (n_samples,), optional
239
+ Target variable
240
+ timestamps : array-like, shape (n_samples,), optional
241
+ Timestamps for temporal ordering. If None, uses sequential order
242
+ groups : array-like, shape (n_samples,), optional
243
+ Group labels (not used in this splitter)
244
+
245
+ Yields
246
+ ------
247
+ train : ndarray
248
+ Training set indices
249
+ test : ndarray
250
+ Test set indices
251
+ """
252
+ if timestamps is None:
253
+ timestamps = np.arange(len(X))
254
+
255
+ n_samples = _num_samples(X)
256
+ indices = np.arange(n_samples)
257
+
258
+ # Sort by timestamp to get temporal order
259
+ time_order = np.argsort(timestamps)
260
+ sorted_indices = indices[time_order]
261
+
262
+ # Auto-calculate sizes if using n_splits mode
263
+ if self.n_splits_mode:
264
+ # Calculate test_size to create exactly n_splits folds
265
+ # Formula: n_samples = window_size + (n_splits * (test_size + gap))
266
+ # For expanding window, window_size is minimum training size
267
+ # We want non-overlapping tests by default
268
+
269
+ if self.expanding_window:
270
+ # Expanding window: start with minimum window, test slides forward
271
+ # Let's use 20% of data as initial window (similar to sklearn)
272
+ min_window_size = max(1, n_samples // (self._n_splits + 1))
273
+ available_for_test = (
274
+ n_samples - min_window_size - (self._n_splits * self.gap)
275
+ )
276
+ calculated_test_size = max(
277
+ 1, available_for_test // self._n_splits
278
+ )
279
+
280
+ # Set calculated values
281
+ self.window_size = min_window_size
282
+ self.test_size = calculated_test_size
283
+ self.step_size = (
284
+ calculated_test_size # Non-overlapping by default
285
+ )
286
+
287
+ logger.info(
288
+ f"n_splits={self._n_splits} with expanding_window: "
289
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
290
+ )
291
+ else:
292
+ # Fixed window: calculate window and test size
293
+ # We want: n_samples = window_size + (n_splits * (test_size + gap))
294
+ # Let's make window_size same as test_size for simplicity
295
+ available = n_samples - (self._n_splits * self.gap)
296
+ calculated_test_size = max(
297
+ 1, available // (self._n_splits + 1)
298
+ )
299
+ calculated_window_size = calculated_test_size
300
+
301
+ # Set calculated values
302
+ self.window_size = calculated_window_size
303
+ self.test_size = calculated_test_size
304
+ self.step_size = (
305
+ calculated_test_size # Non-overlapping by default
306
+ )
307
+
308
+ logger.info(
309
+ f"n_splits={self._n_splits} with fixed window: "
310
+ f"Calculated window_size={self.window_size}, test_size={self.test_size}"
311
+ )
312
+
313
+ if self.expanding_window:
314
+ # Expanding window: training set grows to include all past data
315
+ # Start with minimum window_size, test slides forward
316
+ min_train_size = self.window_size
317
+ total_min = min_train_size + self.gap + self.test_size
318
+
319
+ if n_samples < total_min:
320
+ logger.warning(
321
+ f"Not enough samples ({n_samples}) for even one split. "
322
+ f"Need at least {total_min} samples."
323
+ )
324
+ return
325
+
326
+ # First fold starts at window_size
327
+ test_start_pos = min_train_size + self.gap
328
+
329
+ while test_start_pos + self.test_size <= n_samples:
330
+ test_end_pos = test_start_pos + self.test_size
331
+
332
+ # Training includes all data from start to before gap
333
+ train_end_pos = test_start_pos - self.gap
334
+ train_indices = sorted_indices[0:train_end_pos]
335
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
336
+
337
+ # Apply undersampling if enabled and y is provided
338
+ if self.undersample and y is not None:
339
+ train_indices = self._undersample_indices(
340
+ train_indices, y, timestamps
341
+ )
342
+
343
+ assert (
344
+ len(train_indices) > 0 and len(test_indices) > 0
345
+ ), "Empty window"
346
+
347
+ yield train_indices, test_indices
348
+
349
+ # Move test window forward by step_size
350
+ test_start_pos += self.step_size
351
+
352
+ else:
353
+ # Fixed sliding window: window slides through data
354
+ total_window = self.window_size + self.gap + self.test_size
355
+
356
+ for start in range(
357
+ 0, n_samples - total_window + 1, self.step_size
358
+ ):
359
+ # These positions are in the sorted (temporal) domain
360
+ train_end = start + self.window_size
361
+ test_start = train_end + self.gap
362
+ test_end = test_start + self.test_size
363
+
364
+ if test_end > n_samples:
365
+ break
366
+
367
+ # Extract indices from the temporally sorted sequence
368
+ train_indices = sorted_indices[start:train_end]
369
+ test_indices = sorted_indices[test_start:test_end]
370
+
371
+ # Apply undersampling if enabled and y is provided
372
+ if self.undersample and y is not None:
373
+ train_indices = self._undersample_indices(
374
+ train_indices, y, timestamps
375
+ )
376
+
377
+ assert (
378
+ len(train_indices) > 0 and len(test_indices) > 0
379
+ ), "Empty window"
380
+
381
+ yield train_indices, test_indices
382
+
383
+ def split_with_val(
384
+ self,
385
+ X: np.ndarray,
386
+ y: Optional[np.ndarray] = None,
387
+ timestamps: Optional[np.ndarray] = None,
388
+ groups: Optional[np.ndarray] = None,
389
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
390
+ """
391
+ Generate sliding window splits with validation set.
392
+
393
+ The validation set comes after training but before test, maintaining
394
+ temporal order: train < val < test.
395
+
396
+ Parameters
397
+ ----------
398
+ X : array-like, shape (n_samples, n_features)
399
+ Training data
400
+ y : array-like, shape (n_samples,), optional
401
+ Target variable
402
+ timestamps : array-like, shape (n_samples,), optional
403
+ Timestamps for temporal ordering. If None, uses sequential order
404
+ groups : array-like, shape (n_samples,), optional
405
+ Group labels (not used in this splitter)
406
+
407
+ Yields
408
+ ------
409
+ train : ndarray
410
+ Training set indices
411
+ val : ndarray
412
+ Validation set indices
413
+ test : ndarray
414
+ Test set indices
415
+ """
416
+ if timestamps is None:
417
+ timestamps = np.arange(len(X))
418
+
419
+ n_samples = _num_samples(X)
420
+ indices = np.arange(n_samples)
421
+
422
+ # Sort by timestamp to get temporal order
423
+ time_order = np.argsort(timestamps)
424
+ sorted_indices = indices[time_order]
425
+
426
+ # Auto-calculate sizes if using n_splits mode
427
+ if self.n_splits_mode:
428
+ if self.expanding_window:
429
+ min_window_size = max(1, n_samples // (self._n_splits + 1))
430
+ available_for_test = (
431
+ n_samples - min_window_size - (self._n_splits * self.gap)
432
+ )
433
+ calculated_test_size = max(
434
+ 1, available_for_test // self._n_splits
435
+ )
436
+ self.window_size = min_window_size
437
+ self.test_size = calculated_test_size
438
+ self.step_size = calculated_test_size
439
+ else:
440
+ available = n_samples - (self._n_splits * self.gap)
441
+ calculated_test_size = max(
442
+ 1, available // (self._n_splits + 1)
443
+ )
444
+ calculated_window_size = calculated_test_size
445
+ self.window_size = calculated_window_size
446
+ self.test_size = calculated_test_size
447
+ self.step_size = calculated_test_size
448
+
449
+ # Calculate validation size from training window
450
+ val_size = (
451
+ int(self.window_size * self.val_ratio) if self.val_ratio > 0 else 0
452
+ )
453
+ actual_train_size = self.window_size - val_size
454
+
455
+ if self.expanding_window:
456
+ # Expanding window with validation
457
+ min_train_size = self.window_size
458
+ total_min = min_train_size + self.gap + self.test_size
459
+
460
+ if n_samples < total_min:
461
+ logger.warning(
462
+ f"Not enough samples ({n_samples}) for even one split. "
463
+ f"Need at least {total_min} samples."
464
+ )
465
+ return
466
+
467
+ # Calculate positions for validation and test
468
+ test_start_pos = min_train_size + self.gap
469
+
470
+ while test_start_pos + self.test_size <= n_samples:
471
+ test_end_pos = test_start_pos + self.test_size
472
+
473
+ # Training + validation comes before gap
474
+ train_val_end_pos = test_start_pos - self.gap
475
+
476
+ # Split train/val from the expanding window
477
+ if val_size > 0:
478
+ train_end_pos = train_val_end_pos - val_size
479
+ train_indices = sorted_indices[0:train_end_pos]
480
+ val_indices = sorted_indices[
481
+ train_end_pos:train_val_end_pos
482
+ ]
483
+ else:
484
+ train_indices = sorted_indices[0:train_val_end_pos]
485
+ val_indices = np.array([])
486
+
487
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
488
+
489
+ # Apply undersampling if enabled and y is provided
490
+ if self.undersample and y is not None:
491
+ train_indices = self._undersample_indices(
492
+ train_indices, y, timestamps
493
+ )
494
+ # Also undersample validation set if it exists
495
+ if len(val_indices) > 0:
496
+ val_indices = self._undersample_indices(
497
+ val_indices, y, timestamps
498
+ )
499
+
500
+ assert (
501
+ len(train_indices) > 0 and len(test_indices) > 0
502
+ ), "Empty window"
503
+
504
+ yield train_indices, val_indices, test_indices
505
+
506
+ # Move test window forward by step_size
507
+ test_start_pos += self.step_size
508
+
509
+ else:
510
+ # Fixed sliding window with validation
511
+ total_window = self.window_size + self.gap + self.test_size
512
+
513
+ for start in range(
514
+ 0, n_samples - total_window + 1, self.step_size
515
+ ):
516
+ # These positions are in the sorted (temporal) domain
517
+ train_end = start + actual_train_size
518
+
519
+ # Validation comes after train with optional gap
520
+ val_start = train_end + (self.gap if val_size > 0 else 0)
521
+ val_end = val_start + val_size
522
+
523
+ # Test comes after validation with gap
524
+ test_start = (
525
+ val_end + self.gap
526
+ if val_size > 0
527
+ else train_end + self.gap
528
+ )
529
+ test_end = test_start + self.test_size
530
+
531
+ if test_end > n_samples:
532
+ break
533
+
534
+ # Extract indices from the temporally sorted sequence
535
+ train_indices = sorted_indices[start:train_end]
536
+ val_indices = (
537
+ sorted_indices[val_start:val_end]
538
+ if val_size > 0
539
+ else np.array([])
540
+ )
541
+ test_indices = sorted_indices[test_start:test_end]
542
+
543
+ # Apply undersampling if enabled and y is provided
544
+ if self.undersample and y is not None:
545
+ train_indices = self._undersample_indices(
546
+ train_indices, y, timestamps
547
+ )
548
+ # Also undersample validation set if it exists
549
+ if len(val_indices) > 0:
550
+ val_indices = self._undersample_indices(
551
+ val_indices, y, timestamps
552
+ )
553
+
554
+ # Ensure temporal order is preserved
555
+ assert (
556
+ len(train_indices) > 0 and len(test_indices) > 0
557
+ ), "Empty window"
558
+
559
+ yield train_indices, val_indices, test_indices
560
+
561
+ def get_n_splits(self, X=None, y=None, groups=None):
562
+ """
563
+ Calculate number of splits.
564
+
565
+ Parameters
566
+ ----------
567
+ X : array-like, shape (n_samples, n_features), optional
568
+ Training data (required to determine number of splits in manual mode)
569
+ y : array-like, optional
570
+ Not used
571
+ groups : array-like, optional
572
+ Not used
573
+
574
+ Returns
575
+ -------
576
+ n_splits : int
577
+ Number of splits. Returns -1 if X is None and not in n_splits mode.
578
+ """
579
+ # If using n_splits mode, return the specified n_splits
580
+ if self.n_splits_mode:
581
+ return self._n_splits
582
+
583
+ # Manual mode: need data to calculate
584
+ if X is None:
585
+ return -1 # Can't determine without data
586
+
587
+ n_samples = _num_samples(X)
588
+ total_window = self.window_size + self.gap + self.test_size
589
+ n_windows = (n_samples - total_window) // self.step_size + 1
590
+ return max(0, n_windows)
591
+
592
+ def plot_splits(
593
+ self, X, y=None, timestamps=None, figsize=(12, 6), save_path=None
594
+ ):
595
+ """
596
+ Visualize the sliding window splits as rectangles.
597
+
598
+ Shows train (blue), validation (green), and test (red) sets.
599
+ When val_ratio=0, only shows train and test.
600
+ When undersampling is enabled, shows dropped samples in gray.
601
+
602
+ Parameters
603
+ ----------
604
+ X : array-like
605
+ Training data
606
+ y : array-like, optional
607
+ Target variable (required for undersampling visualization)
608
+ timestamps : array-like, optional
609
+ Timestamps (if None, uses sample indices)
610
+ figsize : tuple, default (12, 6)
611
+ Figure size
612
+ save_path : str, optional
613
+ Path to save the plot
614
+
615
+ Returns
616
+ -------
617
+ fig : matplotlib.figure.Figure
618
+ The created figure
619
+ """
620
+ # Use sample indices if no timestamps provided
621
+ if timestamps is None:
622
+ timestamps = np.arange(len(X))
623
+
624
+ # Get temporal ordering
625
+ time_order = np.argsort(timestamps)
626
+ sorted_timestamps = timestamps[time_order]
627
+
628
+ # Get splits WITH undersampling (if enabled)
629
+ if self.val_ratio > 0:
630
+ splits = list(self.split_with_val(X, y, timestamps))[:10]
631
+ split_type = "train-val-test"
632
+ else:
633
+ splits = list(self.split(X, y, timestamps))[:10]
634
+ split_type = "train-test"
635
+
636
+ if not splits:
637
+ raise ValueError("No splits generated")
638
+
639
+ # If undersampling is enabled, also get splits WITHOUT undersampling to show dropped samples
640
+ splits_no_undersample = None
641
+ if self.undersample and y is not None:
642
+ original_undersample = self.undersample
643
+ self.undersample = False # Temporarily disable
644
+ if self.val_ratio > 0:
645
+ splits_no_undersample = list(
646
+ self.split_with_val(X, y, timestamps)
647
+ )[:10]
648
+ else:
649
+ splits_no_undersample = list(self.split(X, y, timestamps))[:10]
650
+ self.undersample = original_undersample # Restore
651
+
652
+ # Create figure
653
+ fig, ax = stx.plt.subplots(figsize=figsize)
654
+
655
+ # Plot each fold based on temporal position
656
+ for fold, split_indices in enumerate(splits):
657
+ y_pos = fold
658
+
659
+ if len(split_indices) == 3: # train, val, test
660
+ train_idx, val_idx, test_idx = split_indices
661
+
662
+ # Find temporal positions of train indices
663
+ train_positions = []
664
+ for idx in train_idx:
665
+ temp_pos = np.where(time_order == idx)[0][
666
+ 0
667
+ ] # Find position in sorted order
668
+ train_positions.append(temp_pos)
669
+
670
+ # Plot train window based on temporal positions
671
+ if train_positions:
672
+ train_start = min(train_positions)
673
+ train_end = max(train_positions)
674
+ train_rect = patches.Rectangle(
675
+ (train_start, y_pos - 0.3),
676
+ train_end - train_start + 1,
677
+ 0.6,
678
+ linewidth=1,
679
+ edgecolor="blue",
680
+ facecolor="lightblue",
681
+ alpha=0.7,
682
+ label="Train" if fold == 0 else "",
683
+ )
684
+ ax.add_patch(train_rect)
685
+
686
+ # Find temporal positions of validation indices
687
+ if len(val_idx) > 0:
688
+ val_positions = []
689
+ for idx in val_idx:
690
+ temp_pos = np.where(time_order == idx)[0][0]
691
+ val_positions.append(temp_pos)
692
+
693
+ # Plot validation window
694
+ if val_positions:
695
+ val_start = min(val_positions)
696
+ val_end = max(val_positions)
697
+ val_rect = patches.Rectangle(
698
+ (val_start, y_pos - 0.3),
699
+ val_end - val_start + 1,
700
+ 0.6,
701
+ linewidth=1,
702
+ edgecolor="green",
703
+ facecolor="lightgreen",
704
+ alpha=0.7,
705
+ label="Validation" if fold == 0 else "",
706
+ )
707
+ ax.add_patch(val_rect)
708
+
709
+ # Find temporal positions of test indices
710
+ test_positions = []
711
+ for idx in test_idx:
712
+ temp_pos = np.where(time_order == idx)[0][
713
+ 0
714
+ ] # Find position in sorted order
715
+ test_positions.append(temp_pos)
716
+
717
+ # Plot test window based on temporal positions
718
+ if test_positions:
719
+ test_start = min(test_positions)
720
+ test_end = max(test_positions)
721
+ test_rect = patches.Rectangle(
722
+ (test_start, y_pos - 0.3),
723
+ test_end - test_start + 1,
724
+ 0.6,
725
+ linewidth=1,
726
+ edgecolor=COLORS["RGBA_NORM"]["red"],
727
+ facecolor=COLORS["RGBA_NORM"]["red"],
728
+ alpha=0.7,
729
+ label="Test" if fold == 0 else "",
730
+ )
731
+ ax.add_patch(test_rect)
732
+
733
+ else: # train, test (2-way split)
734
+ train_idx, test_idx = split_indices
735
+
736
+ # Find temporal positions of train indices
737
+ train_positions = []
738
+ for idx in train_idx:
739
+ temp_pos = np.where(time_order == idx)[0][
740
+ 0
741
+ ] # Find position in sorted order
742
+ train_positions.append(temp_pos)
743
+
744
+ # Plot train window based on temporal positions
745
+ if train_positions:
746
+ train_start = min(train_positions)
747
+ train_end = max(train_positions)
748
+ train_rect = patches.Rectangle(
749
+ (train_start, y_pos - 0.3),
750
+ train_end - train_start + 1,
751
+ 0.6,
752
+ linewidth=1,
753
+ edgecolor=COLORS["RGBA_NORM"]["light_blue"],
754
+ facecolor=COLORS["RGBA_NORM"]["light_blue"],
755
+ alpha=0.7,
756
+ label="Train" if fold == 0 else "",
757
+ )
758
+ ax.add_patch(train_rect)
759
+
760
+ # Find temporal positions of test indices
761
+ test_positions = []
762
+ for idx in test_idx:
763
+ temp_pos = np.where(time_order == idx)[0][
764
+ 0
765
+ ] # Find position in sorted order
766
+ test_positions.append(temp_pos)
767
+
768
+ # Plot test window based on temporal positions
769
+ if test_positions:
770
+ test_start = min(test_positions)
771
+ test_end = max(test_positions)
772
+ test_rect = patches.Rectangle(
773
+ (test_start, y_pos - 0.3),
774
+ test_end - test_start + 1,
775
+ 0.6,
776
+ linewidth=1,
777
+ edgecolor="red",
778
+ facecolor="lightcoral",
779
+ alpha=0.7,
780
+ label="Test" if fold == 0 else "",
781
+ )
782
+ ax.add_patch(test_rect)
783
+
784
+ # Add scatter plots of actual data points with jittering
785
+ np.random.seed(42) # For reproducible jittering
786
+ jitter_strength = 0.15 # Amount of vertical jittering
787
+
788
+ # First, plot dropped samples in gray if undersampling is enabled
789
+ if splits_no_undersample is not None:
790
+ for fold, split_indices_no_us in enumerate(splits_no_undersample):
791
+ y_pos = fold
792
+ split_indices_us = splits[fold]
793
+
794
+ if len(split_indices_no_us) == 3: # train, val, test
795
+ train_idx_no_us, val_idx_no_us, test_idx_no_us = (
796
+ split_indices_no_us
797
+ )
798
+ train_idx_us, val_idx_us, test_idx_us = split_indices_us
799
+
800
+ # Find dropped train samples
801
+ dropped_train = np.setdiff1d(train_idx_no_us, train_idx_us)
802
+ if len(dropped_train) > 0:
803
+ dropped_train_positions = [
804
+ np.where(time_order == idx)[0][0]
805
+ for idx in dropped_train
806
+ ]
807
+ dropped_train_jitter = np.random.normal(
808
+ 0, jitter_strength, len(dropped_train_positions)
809
+ )
810
+ ax.plot_scatter(
811
+ dropped_train_positions,
812
+ y_pos + dropped_train_jitter,
813
+ c="gray",
814
+ s=15,
815
+ alpha=0.3,
816
+ marker="x",
817
+ label="Dropped (train)" if fold == 0 else "",
818
+ zorder=2,
819
+ )
820
+
821
+ # Find dropped validation samples
822
+ dropped_val = np.setdiff1d(val_idx_no_us, val_idx_us)
823
+ if len(dropped_val) > 0:
824
+ dropped_val_positions = [
825
+ np.where(time_order == idx)[0][0]
826
+ for idx in dropped_val
827
+ ]
828
+ dropped_val_jitter = np.random.normal(
829
+ 0, jitter_strength, len(dropped_val_positions)
830
+ )
831
+ ax.plot_scatter(
832
+ dropped_val_positions,
833
+ y_pos + dropped_val_jitter,
834
+ c="gray",
835
+ s=15,
836
+ alpha=0.3,
837
+ marker="x",
838
+ label="Dropped (val)" if fold == 0 else "",
839
+ zorder=2,
840
+ )
841
+
842
+ else: # train, test (2-way split)
843
+ train_idx_no_us, test_idx_no_us = split_indices_no_us
844
+ train_idx_us, test_idx_us = split_indices_us
845
+
846
+ # Find dropped train samples
847
+ dropped_train = np.setdiff1d(train_idx_no_us, train_idx_us)
848
+ if len(dropped_train) > 0:
849
+ dropped_train_positions = [
850
+ np.where(time_order == idx)[0][0]
851
+ for idx in dropped_train
852
+ ]
853
+ dropped_train_jitter = np.random.normal(
854
+ 0, jitter_strength, len(dropped_train_positions)
855
+ )
856
+ ax.plot_scatter(
857
+ dropped_train_positions,
858
+ y_pos + dropped_train_jitter,
859
+ c="gray",
860
+ s=15,
861
+ alpha=0.3,
862
+ marker="x",
863
+ label="Dropped samples" if fold == 0 else "",
864
+ zorder=2,
865
+ )
866
+
867
+ # Then, plot kept samples in color
868
+ for fold, split_indices in enumerate(splits):
869
+ y_pos = fold
870
+
871
+ if len(split_indices) == 3: # train, val, test
872
+ train_idx, val_idx, test_idx = split_indices
873
+
874
+ # Find temporal positions for scatter plot
875
+ train_positions = []
876
+ for idx in train_idx:
877
+ temp_pos = np.where(time_order == idx)[0][0]
878
+ train_positions.append(temp_pos)
879
+
880
+ val_positions = []
881
+ if len(val_idx) > 0:
882
+ for idx in val_idx:
883
+ temp_pos = np.where(time_order == idx)[0][0]
884
+ val_positions.append(temp_pos)
885
+
886
+ test_positions = []
887
+ for idx in test_idx:
888
+ temp_pos = np.where(time_order == idx)[0][0]
889
+ test_positions.append(temp_pos)
890
+
891
+ # Add jittered scatter plots for 3-way split
892
+ if train_positions:
893
+ train_jitter = np.random.normal(
894
+ 0, jitter_strength, len(train_positions)
895
+ )
896
+ # Color by class if y is provided
897
+ if y is not None:
898
+ train_colors = [
899
+ stx.plt.color.PARAMS["RGBA_NORM"]["blue"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["light_blue"]
900
+ for idx in train_idx
901
+ ]
902
+ ax.plot_scatter(
903
+ train_positions,
904
+ y_pos + train_jitter,
905
+ c=train_colors,
906
+ s=20,
907
+ alpha=0.7,
908
+ marker="o",
909
+ label="Train (class 0)" if fold == 0 else "",
910
+ zorder=3,
911
+ )
912
+ else:
913
+ ax.plot_scatter(
914
+ train_positions,
915
+ y_pos + train_jitter,
916
+ c="darkblue",
917
+ s=20,
918
+ alpha=0.7,
919
+ marker="o",
920
+ label="Train points" if fold == 0 else "",
921
+ zorder=3,
922
+ )
923
+
924
+ if val_positions:
925
+ val_jitter = np.random.normal(
926
+ 0, jitter_strength, len(val_positions)
927
+ )
928
+ # Color by class if y is provided
929
+ if y is not None:
930
+ val_colors = [
931
+ stx.plt.color.PARAMS["RGBA_NORM"]["yellow"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["orange"]
932
+ for idx in val_idx
933
+ ]
934
+ ax.plot_scatter(
935
+ val_positions,
936
+ y_pos + val_jitter,
937
+ c=val_colors,
938
+ s=20,
939
+ alpha=0.7,
940
+ marker="^",
941
+ label="Val (class 0)" if fold == 0 else "",
942
+ zorder=3,
943
+ )
944
+ else:
945
+ ax.plot_scatter(
946
+ val_positions,
947
+ y_pos + val_jitter,
948
+ c="darkgreen",
949
+ s=20,
950
+ alpha=0.7,
951
+ marker="^",
952
+ label="Val points" if fold == 0 else "",
953
+ zorder=3,
954
+ )
955
+
956
+ if test_positions:
957
+ test_jitter = np.random.normal(
958
+ 0, jitter_strength, len(test_positions)
959
+ )
960
+ # Color by class if y is provided
961
+ if y is not None:
962
+ test_colors = [
963
+ stx.plt.color.PARAMS["RGBA_NORM"]["red"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["brown"]
964
+ for idx in test_idx
965
+ ]
966
+ ax.plot_scatter(
967
+ test_positions,
968
+ y_pos + test_jitter,
969
+ c=test_colors,
970
+ s=20,
971
+ alpha=0.7,
972
+ marker="s",
973
+ label="Test (class 0)" if fold == 0 else "",
974
+ zorder=3,
975
+ )
976
+ else:
977
+ ax.plot_scatter(
978
+ test_positions,
979
+ y_pos + test_jitter,
980
+ c="darkred",
981
+ s=20,
982
+ alpha=0.7,
983
+ marker="s",
984
+ label="Test points" if fold == 0 else "",
985
+ zorder=3,
986
+ )
987
+
988
+ else: # train, test (2-way split)
989
+ train_idx, test_idx = split_indices
990
+
991
+ # Get actual timestamps for train and test indices
992
+ train_times = (
993
+ timestamps[train_idx]
994
+ if timestamps is not None
995
+ else train_idx
996
+ )
997
+ test_times = (
998
+ timestamps[test_idx]
999
+ if timestamps is not None
1000
+ else test_idx
1001
+ )
1002
+
1003
+ # Find temporal positions for scatter plot
1004
+ train_positions = []
1005
+ for idx in train_idx:
1006
+ temp_pos = np.where(time_order == idx)[0][0]
1007
+ train_positions.append(temp_pos)
1008
+
1009
+ test_positions = []
1010
+ for idx in test_idx:
1011
+ temp_pos = np.where(time_order == idx)[0][0]
1012
+ test_positions.append(temp_pos)
1013
+
1014
+ # Add jittered scatter plots for 2-way split
1015
+ if train_positions:
1016
+ train_jitter = np.random.normal(
1017
+ 0, jitter_strength, len(train_positions)
1018
+ )
1019
+ # Color by class if y is provided
1020
+ if y is not None:
1021
+ train_colors = [
1022
+ stx.plt.color.PARAMS["RGBA_NORM"]["blue"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["light_blue"]
1023
+ for idx in train_idx
1024
+ ]
1025
+ ax.plot_scatter(
1026
+ train_positions,
1027
+ y_pos + train_jitter,
1028
+ c=train_colors,
1029
+ s=20,
1030
+ alpha=0.7,
1031
+ marker="o",
1032
+ label="Train (class 0)" if fold == 0 else "",
1033
+ zorder=3,
1034
+ )
1035
+ else:
1036
+ ax.plot_scatter(
1037
+ train_positions,
1038
+ y_pos + train_jitter,
1039
+ c="darkblue",
1040
+ s=20,
1041
+ alpha=0.7,
1042
+ marker="o",
1043
+ label="Train points" if fold == 0 else "",
1044
+ zorder=3,
1045
+ )
1046
+
1047
+ if test_positions:
1048
+ test_jitter = np.random.normal(
1049
+ 0, jitter_strength, len(test_positions)
1050
+ )
1051
+ # Color by class if y is provided
1052
+ if y is not None:
1053
+ test_colors = [
1054
+ stx.plt.color.PARAMS["RGBA_NORM"]["red"] if y[idx] == 0 else stx.plt.color.PARAMS["RGBA_NORM"]["brown"]
1055
+ for idx in test_idx
1056
+ ]
1057
+ ax.plot_scatter(
1058
+ test_positions,
1059
+ y_pos + test_jitter,
1060
+ c=test_colors,
1061
+ s=20,
1062
+ alpha=0.7,
1063
+ marker="s",
1064
+ label="Test (class 0)" if fold == 0 else "",
1065
+ zorder=3,
1066
+ )
1067
+ else:
1068
+ ax.plot_scatter(
1069
+ test_positions,
1070
+ y_pos + test_jitter,
1071
+ c="darkred",
1072
+ s=20,
1073
+ alpha=0.7,
1074
+ marker="s",
1075
+ label="Test points" if fold == 0 else "",
1076
+ zorder=3,
1077
+ )
1078
+
1079
+ # Format plot
1080
+ ax.set_ylim(-0.5, len(splits) - 0.5)
1081
+ ax.set_xlim(0, len(X))
1082
+ ax.set_xlabel("Temporal Position (sorted by timestamp)")
1083
+ ax.set_ylabel("Fold")
1084
+ gap_text = f", Gap: {self.gap}" if self.gap > 0 else ""
1085
+ val_text = (
1086
+ f", Val ratio: {self.val_ratio:.1%}" if self.val_ratio > 0 else ""
1087
+ )
1088
+ ax.set_title(
1089
+ f"Sliding Window Split Visualization ({split_type})\\n"
1090
+ f"Window: {self.window_size}, Step: {self.step_size}, Test: {self.test_size}{gap_text}{val_text}\\n"
1091
+ f"Rectangles show windows, dots show actual data points"
1092
+ )
1093
+
1094
+ # Set y-ticks
1095
+ ax.set_yticks(range(len(splits)))
1096
+ ax.set_yticklabels([f"Fold {i}" for i in range(len(splits))])
1097
+
1098
+ # Add enhanced legend with class and sample information
1099
+ if y is not None:
1100
+ # Count samples per class in total dataset
1101
+ unique_classes, class_counts = np.unique(y, return_counts=True)
1102
+ total_class_info = ", ".join(
1103
+ [
1104
+ f"Class {cls}: n={count}"
1105
+ for cls, count in zip(unique_classes, class_counts)
1106
+ ]
1107
+ )
1108
+
1109
+ # Count samples in first fold to show per-fold distribution
1110
+ first_split = splits[0]
1111
+ if len(first_split) == 3: # train, val, test
1112
+ train_idx, val_idx, test_idx = first_split
1113
+ fold_info = f"Fold 0: Train n={len(train_idx)}, Val n={len(val_idx)}, Test n={len(test_idx)}"
1114
+ else: # train, test
1115
+ train_idx, test_idx = first_split
1116
+ fold_info = (
1117
+ f"Fold 0: Train n={len(train_idx)}, Test n={len(test_idx)}"
1118
+ )
1119
+
1120
+ # Add legend with class information
1121
+ handles, labels = ax.get_legend_handles_labels()
1122
+ # Add title to legend showing class distribution
1123
+ legend_title = f"Total: {total_class_info}\\n{fold_info}"
1124
+ ax.legend(handles, labels, loc="upper right", title=legend_title)
1125
+ else:
1126
+ ax.legend(loc="upper right")
1127
+
1128
+ plt.tight_layout()
1129
+
1130
+ if save_path:
1131
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
1132
+
1133
+ return fig
1134
+
1135
+
1136
+ """Functions & Classes"""
1137
+ def main(args) -> int:
1138
+ """Demonstrate TimeSeriesSlidingWindowSplit functionality.
1139
+
1140
+ Args:
1141
+ args: Command line arguments
1142
+
1143
+ Returns:
1144
+ int: Exit status
1145
+ """
1146
+
1147
+ def demo_01_fixed_window_non_overlapping_tests(X, y, timestamps):
1148
+ """Demo 1: Fixed window size with non-overlapping test sets (DEFAULT).
1149
+
1150
+ Best for: Testing model on consistent recent history.
1151
+ Each sample tested exactly once (like K-fold for time series).
1152
+ """
1153
+ logger.info("=" * 70)
1154
+ logger.info("DEMO 1: Fixed Window + Non-overlapping Tests (DEFAULT)")
1155
+ logger.info("=" * 70)
1156
+ logger.info("Best for: Testing model on consistent recent history")
1157
+
1158
+ splitter = TimeSeriesSlidingWindowSplit(
1159
+ window_size=args.window_size,
1160
+ test_size=args.test_size,
1161
+ gap=args.gap,
1162
+ overlapping_tests=False, # Default
1163
+ expanding_window=False, # Default
1164
+ )
1165
+
1166
+ splits = list(splitter.split(X, y, timestamps))[:5]
1167
+ logger.info(f"Generated {len(splits)} splits")
1168
+
1169
+ for fold, (train_idx, test_idx) in enumerate(splits):
1170
+ logger.info(
1171
+ f" Fold {fold}: Train={len(train_idx)} (fixed), Test={len(test_idx)}"
1172
+ )
1173
+
1174
+ fig = splitter.plot_splits(X, y, timestamps)
1175
+ stx.io.save(
1176
+ fig, "./01_sliding_window_fixed.jpg", symlink_from_cwd=True
1177
+ )
1178
+ logger.info("")
1179
+
1180
+ return splits
1181
+
1182
+ def demo_02_expanding_window_non_overlapping_tests(X, y, timestamps):
1183
+ """Demo 2: Expanding window with non-overlapping test sets.
1184
+
1185
+ Best for: Using all available past data (like sklearn TimeSeriesSplit).
1186
+ Training set grows to include all historical data.
1187
+ """
1188
+ logger.info("=" * 70)
1189
+ logger.info("DEMO 2: Expanding Window + Non-overlapping Tests")
1190
+ logger.info("=" * 70)
1191
+ logger.info(
1192
+ "Best for: Using all available past data (like sklearn TimeSeriesSplit)"
1193
+ )
1194
+
1195
+ splitter = TimeSeriesSlidingWindowSplit(
1196
+ window_size=args.window_size,
1197
+ test_size=args.test_size,
1198
+ gap=args.gap,
1199
+ overlapping_tests=False,
1200
+ expanding_window=True, # Use all past data!
1201
+ )
1202
+
1203
+ splits = list(splitter.split(X, y, timestamps))[:5]
1204
+ logger.info(f"Generated {len(splits)} splits")
1205
+
1206
+ for fold, (train_idx, test_idx) in enumerate(splits):
1207
+ logger.info(
1208
+ f" Fold {fold}: Train={len(train_idx)} (growing!), Test={len(test_idx)}"
1209
+ )
1210
+
1211
+ fig = splitter.plot_splits(X, y, timestamps)
1212
+ stx.io.save(
1213
+ fig, "./02_sliding_window_expanding.jpg", symlink_from_cwd=True
1214
+ )
1215
+ logger.info("")
1216
+
1217
+ return splits
1218
+
1219
+ def demo_03_fixed_window_overlapping_tests(X, y, timestamps):
1220
+ """Demo 3: Fixed window with overlapping test sets.
1221
+
1222
+ Best for: Maximum evaluation points (like K-fold training reuse).
1223
+ Test sets can overlap for more frequent model evaluation.
1224
+ """
1225
+ logger.info("=" * 70)
1226
+ logger.info("DEMO 3: Fixed Window + Overlapping Tests")
1227
+ logger.info("=" * 70)
1228
+ logger.info(
1229
+ "Best for: Maximum evaluation points (like K-fold for training)"
1230
+ )
1231
+
1232
+ splitter = TimeSeriesSlidingWindowSplit(
1233
+ window_size=args.window_size,
1234
+ test_size=args.test_size,
1235
+ gap=args.gap,
1236
+ overlapping_tests=True, # Allow test overlap
1237
+ expanding_window=False,
1238
+ # step_size will default to test_size // 2 for 50% overlap
1239
+ )
1240
+
1241
+ splits = list(splitter.split(X, y, timestamps))[:5]
1242
+ logger.info(f"Generated {len(splits)} splits")
1243
+
1244
+ for fold, (train_idx, test_idx) in enumerate(splits):
1245
+ logger.info(
1246
+ f" Fold {fold}: Train={len(train_idx)}, Test={len(test_idx)}"
1247
+ )
1248
+
1249
+ fig = splitter.plot_splits(X, y, timestamps)
1250
+ stx.io.save(
1251
+ fig, "./03_sliding_window_overlapping.jpg", symlink_from_cwd=True
1252
+ )
1253
+ logger.info("")
1254
+
1255
+ return splits
1256
+
1257
+ def demo_04_undersample_imbalanced_data(X, y_imbalanced, timestamps):
1258
+ """Demo 4: Undersampling for imbalanced time series data.
1259
+
1260
+ Best for: Handling class imbalance in training sets.
1261
+ Balances classes by randomly undersampling majority class.
1262
+ """
1263
+ logger.info("=" * 70)
1264
+ logger.info("DEMO 4: Undersampling for Imbalanced Data")
1265
+ logger.info("=" * 70)
1266
+ logger.info("Best for: Handling class imbalance in time series")
1267
+
1268
+ # Show data imbalance
1269
+ unique, counts = np.unique(y_imbalanced, return_counts=True)
1270
+ logger.info(f"Class distribution: {dict(zip(unique, counts))}")
1271
+ logger.info("")
1272
+
1273
+ # Without undersampling
1274
+ splitter_no_undersample = TimeSeriesSlidingWindowSplit(
1275
+ window_size=args.window_size,
1276
+ test_size=args.test_size,
1277
+ gap=args.gap,
1278
+ undersample=False,
1279
+ )
1280
+
1281
+ splits_no_us = list(
1282
+ splitter_no_undersample.split(X, y_imbalanced, timestamps)
1283
+ )[:3]
1284
+ logger.info(f"WITHOUT undersampling: {len(splits_no_us)} splits")
1285
+ for fold, (train_idx, test_idx) in enumerate(splits_no_us):
1286
+ train_labels = y_imbalanced[train_idx]
1287
+ train_unique, train_counts = np.unique(
1288
+ train_labels, return_counts=True
1289
+ )
1290
+ logger.info(
1291
+ f" Fold {fold}: Train size={len(train_idx)}, "
1292
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1293
+ )
1294
+ logger.info("")
1295
+
1296
+ # With undersampling
1297
+ splitter_undersample = TimeSeriesSlidingWindowSplit(
1298
+ window_size=args.window_size,
1299
+ test_size=args.test_size,
1300
+ gap=args.gap,
1301
+ undersample=True, # Enable undersampling!
1302
+ random_state=42,
1303
+ )
1304
+
1305
+ splits_us = list(
1306
+ splitter_undersample.split(X, y_imbalanced, timestamps)
1307
+ )[:3]
1308
+ logger.info(f"WITH undersampling: {len(splits_us)} splits")
1309
+ for fold, (train_idx, test_idx) in enumerate(splits_us):
1310
+ train_labels = y_imbalanced[train_idx]
1311
+ train_unique, train_counts = np.unique(
1312
+ train_labels, return_counts=True
1313
+ )
1314
+ logger.info(
1315
+ f" Fold {fold}: Train size={len(train_idx)} (balanced!), "
1316
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1317
+ )
1318
+
1319
+ # Save visualization for undersampling
1320
+ fig = splitter_undersample.plot_splits(X, y_imbalanced, timestamps)
1321
+ stx.io.save(
1322
+ fig, "./04_sliding_window_undersample.jpg", symlink_from_cwd=True
1323
+ )
1324
+ logger.info("")
1325
+
1326
+ return splits_us
1327
+
1328
+ def demo_05_validation_dataset(X, y, timestamps):
1329
+ """Demo 5: Using validation dataset with train-val-test splits.
1330
+
1331
+ Best for: Model selection and hyperparameter tuning.
1332
+ Creates train/validation/test splits maintaining temporal order.
1333
+ """
1334
+ logger.info("=" * 70)
1335
+ logger.info("DEMO 5: Validation Dataset (Train-Val-Test Splits)")
1336
+ logger.info("=" * 70)
1337
+ logger.info("Best for: Model selection and hyperparameter tuning")
1338
+
1339
+ splitter = TimeSeriesSlidingWindowSplit(
1340
+ window_size=args.window_size,
1341
+ test_size=args.test_size,
1342
+ gap=args.gap,
1343
+ val_ratio=0.2, # 20% of training window for validation
1344
+ overlapping_tests=False,
1345
+ expanding_window=False,
1346
+ )
1347
+
1348
+ splits = list(splitter.split_with_val(X, y, timestamps))[:3]
1349
+ logger.info(f"Generated {len(splits)} splits")
1350
+
1351
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1352
+ logger.info(
1353
+ f" Fold {fold}: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}"
1354
+ )
1355
+
1356
+ fig = splitter.plot_splits(X, y, timestamps)
1357
+ stx.io.save(
1358
+ fig, "./05_sliding_window_validation.jpg", symlink_from_cwd=True
1359
+ )
1360
+ logger.info("")
1361
+
1362
+ return splits
1363
+
1364
+ def demo_06_expanding_with_validation(X, y, timestamps):
1365
+ """Demo 6: Expanding window with validation dataset.
1366
+
1367
+ Best for: Using all historical data with model selection.
1368
+ Combines expanding window and validation split.
1369
+ """
1370
+ logger.info("=" * 70)
1371
+ logger.info("DEMO 6: Expanding Window + Validation Dataset")
1372
+ logger.info("=" * 70)
1373
+ logger.info("Best for: Using all historical data with model selection")
1374
+
1375
+ splitter = TimeSeriesSlidingWindowSplit(
1376
+ window_size=args.window_size,
1377
+ test_size=args.test_size,
1378
+ gap=args.gap,
1379
+ val_ratio=0.2,
1380
+ overlapping_tests=False,
1381
+ expanding_window=True, # Expanding + validation!
1382
+ )
1383
+
1384
+ splits = list(splitter.split_with_val(X, y, timestamps))[:3]
1385
+ logger.info(f"Generated {len(splits)} splits")
1386
+
1387
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1388
+ logger.info(
1389
+ f" Fold {fold}: Train={len(train_idx)} (growing!), Val={len(val_idx)}, Test={len(test_idx)}"
1390
+ )
1391
+
1392
+ fig = splitter.plot_splits(X, y, timestamps)
1393
+ stx.io.save(
1394
+ fig,
1395
+ "./06_sliding_window_expanding_validation.jpg",
1396
+ symlink_from_cwd=True,
1397
+ )
1398
+ logger.info("")
1399
+
1400
+ return splits
1401
+
1402
+ def demo_07_undersample_with_validation(X, y_imbalanced, timestamps):
1403
+ """Demo 7: Undersampling with validation dataset.
1404
+
1405
+ Best for: Handling imbalanced data with hyperparameter tuning.
1406
+ Combines undersampling and validation split.
1407
+ """
1408
+
1409
+ logger.info("=" * 70)
1410
+ logger.info("DEMO 7: Undersampling + Validation Dataset")
1411
+ logger.info("=" * 70)
1412
+ logger.info("Best for: Imbalanced data with hyperparameter tuning")
1413
+
1414
+ splitter = TimeSeriesSlidingWindowSplit(
1415
+ window_size=args.window_size,
1416
+ test_size=args.test_size,
1417
+ gap=args.gap,
1418
+ val_ratio=0.2,
1419
+ undersample=True, # Undersample + validation!
1420
+ random_state=42,
1421
+ )
1422
+
1423
+ splits = list(splitter.split_with_val(X, y_imbalanced, timestamps))[:3]
1424
+ logger.info(f"Generated {len(splits)} splits")
1425
+
1426
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1427
+ train_labels = y_imbalanced[train_idx]
1428
+ train_unique, train_counts = np.unique(
1429
+ train_labels, return_counts=True
1430
+ )
1431
+ logger.info(
1432
+ f" Fold {fold}: Train={len(train_idx)} (balanced!), Val={len(val_idx)}, Test={len(test_idx)}, "
1433
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1434
+ )
1435
+
1436
+ fig = splitter.plot_splits(X, y_imbalanced, timestamps)
1437
+ stx.io.save(
1438
+ fig,
1439
+ "./07_sliding_window_undersample_validation.jpg",
1440
+ symlink_from_cwd=True,
1441
+ )
1442
+ logger.info("")
1443
+
1444
+ return splits
1445
+
1446
+ def demo_08_all_options_combined(X, y_imbalanced, timestamps):
1447
+ """Demo 8: All options combined.
1448
+
1449
+ Best for: Maximum flexibility - expanding window, undersampling, and validation.
1450
+ Shows all features working together.
1451
+ """
1452
+ logger.info("=" * 70)
1453
+ logger.info(
1454
+ "DEMO 8: Expanding + Undersampling + Validation (ALL OPTIONS)"
1455
+ )
1456
+ logger.info("=" * 70)
1457
+ logger.info("Best for: Comprehensive time series CV with all features")
1458
+
1459
+ splitter = TimeSeriesSlidingWindowSplit(
1460
+ window_size=args.window_size,
1461
+ test_size=args.test_size,
1462
+ gap=args.gap,
1463
+ val_ratio=0.2,
1464
+ overlapping_tests=False,
1465
+ expanding_window=True, # All three!
1466
+ undersample=True,
1467
+ random_state=42,
1468
+ )
1469
+
1470
+ splits = list(splitter.split_with_val(X, y_imbalanced, timestamps))[:3]
1471
+ logger.info(f"Generated {len(splits)} splits")
1472
+
1473
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splits):
1474
+ train_labels = y_imbalanced[train_idx]
1475
+ train_unique, train_counts = np.unique(
1476
+ train_labels, return_counts=True
1477
+ )
1478
+ logger.info(
1479
+ f" Fold {fold}: Train={len(train_idx)} (growing & balanced!), Val={len(val_idx)}, Test={len(test_idx)}, "
1480
+ f"Class dist={dict(zip(train_unique, train_counts))}"
1481
+ )
1482
+
1483
+ fig = splitter.plot_splits(X, y_imbalanced, timestamps)
1484
+ stx.io.save(
1485
+ fig, "./08_sliding_window_all_options.jpg", symlink_from_cwd=True
1486
+ )
1487
+ logger.info("")
1488
+
1489
+ return splits
1490
+
1491
+ def print_summary(
1492
+ splits_fixed,
1493
+ splits_expanding,
1494
+ splits_overlap,
1495
+ splits_undersample=None,
1496
+ splits_validation=None,
1497
+ splits_expanding_val=None,
1498
+ splits_undersample_val=None,
1499
+ splits_all_options=None,
1500
+ ):
1501
+ """Print comparison summary of all modes."""
1502
+ logger.info("=" * 70)
1503
+ logger.info("SUMMARY COMPARISON")
1504
+ logger.info("=" * 70)
1505
+ logger.info(
1506
+ f"01. Fixed window (non-overlap): {len(splits_fixed)} folds, train size constant"
1507
+ )
1508
+ logger.info(
1509
+ f"02. Expanding window (non-overlap): {len(splits_expanding)} folds, train size grows"
1510
+ )
1511
+ logger.info(
1512
+ f"03. Fixed window (overlapping): {len(splits_overlap)} folds, more eval points"
1513
+ )
1514
+ if splits_undersample is not None:
1515
+ logger.info(
1516
+ f"04. With undersampling: {len(splits_undersample)} folds, balanced classes"
1517
+ )
1518
+ if splits_validation is not None:
1519
+ logger.info(
1520
+ f"05. With validation set: {len(splits_validation)} folds, train-val-test"
1521
+ )
1522
+ if splits_expanding_val is not None:
1523
+ logger.info(
1524
+ f"06. Expanding + validation: {len(splits_expanding_val)} folds, growing train with val"
1525
+ )
1526
+ if splits_undersample_val is not None:
1527
+ logger.info(
1528
+ f"07. Undersample + validation: {len(splits_undersample_val)} folds, balanced with val"
1529
+ )
1530
+ if splits_all_options is not None:
1531
+ logger.info(
1532
+ f"08. All options combined: {len(splits_all_options)} folds, expanding + balanced + val"
1533
+ )
1534
+ logger.info("")
1535
+ logger.info("Key Insights:")
1536
+ logger.info(
1537
+ " - Non-overlapping tests (default): Each sample tested exactly once"
1538
+ )
1539
+ logger.info(
1540
+ " - Expanding window: Maximizes training data, like sklearn TimeSeriesSplit"
1541
+ )
1542
+ logger.info(
1543
+ " - Overlapping tests: More evaluation points, like K-fold training reuse"
1544
+ )
1545
+ if splits_undersample is not None:
1546
+ logger.info(
1547
+ " - Undersampling: Balances imbalanced classes in training sets"
1548
+ )
1549
+ if splits_validation is not None:
1550
+ logger.info(
1551
+ " - Validation set: Enables hyperparameter tuning with temporal order"
1552
+ )
1553
+ if splits_all_options is not None:
1554
+ logger.info(
1555
+ " - Combined options: Maximum flexibility for complex time series CV"
1556
+ )
1557
+ logger.info("=" * 70)
1558
+
1559
+ # Main execution
1560
+ logger.info("=" * 70)
1561
+ logger.info("Demonstrating TimeSeriesSlidingWindowSplit with New Options")
1562
+ logger.info("=" * 70)
1563
+
1564
+ # Generate test data
1565
+ np.random.seed(42)
1566
+ n_samples = args.n_samples
1567
+ X = np.random.randn(n_samples, 5)
1568
+ y = np.random.randint(0, 2, n_samples) # Balanced
1569
+ timestamps = np.arange(n_samples) + np.random.normal(0, 0.1, n_samples)
1570
+
1571
+ # Create imbalanced labels (80% class 0, 20% class 1)
1572
+ y_imbalanced = np.zeros(n_samples, dtype=int)
1573
+ n_minority = int(n_samples * 0.2)
1574
+ minority_indices = np.random.choice(
1575
+ n_samples, size=n_minority, replace=False
1576
+ )
1577
+ y_imbalanced[minority_indices] = 1
1578
+
1579
+ logger.info(
1580
+ f"Generated test data: {n_samples} samples, {X.shape[1]} features"
1581
+ )
1582
+ logger.info("")
1583
+
1584
+ # Run demos
1585
+ splits_fixed = demo_01_fixed_window_non_overlapping_tests(X, y, timestamps)
1586
+ splits_expanding = demo_02_expanding_window_non_overlapping_tests(
1587
+ X, y, timestamps
1588
+ )
1589
+ splits_overlap = demo_03_fixed_window_overlapping_tests(X, y, timestamps)
1590
+ splits_undersample = demo_04_undersample_imbalanced_data(
1591
+ X, y_imbalanced, timestamps
1592
+ )
1593
+ splits_validation = demo_05_validation_dataset(X, y, timestamps)
1594
+ splits_expanding_val = demo_06_expanding_with_validation(X, y, timestamps)
1595
+ splits_undersample_val = demo_07_undersample_with_validation(
1596
+ X, y_imbalanced, timestamps
1597
+ )
1598
+ splits_all_options = demo_08_all_options_combined(
1599
+ X, y_imbalanced, timestamps
1600
+ )
1601
+
1602
+ # Print summary
1603
+ print_summary(
1604
+ splits_fixed,
1605
+ splits_expanding,
1606
+ splits_overlap,
1607
+ splits_undersample,
1608
+ splits_validation,
1609
+ splits_expanding_val,
1610
+ splits_undersample_val,
1611
+ splits_all_options,
1612
+ )
1613
+
1614
+ return 0
1615
+
1616
+
1617
+ def parse_args() -> argparse.Namespace:
1618
+ """Parse command line arguments."""
1619
+ parser = argparse.ArgumentParser(
1620
+ description="Demonstrate TimeSeriesSlidingWindowSplit with overlapping_tests and expanding_window options"
1621
+ )
1622
+ parser.add_argument(
1623
+ "--n-samples",
1624
+ type=int,
1625
+ default=200,
1626
+ help="Number of samples to generate (default: %(default)s)",
1627
+ )
1628
+ parser.add_argument(
1629
+ "--window-size",
1630
+ type=int,
1631
+ default=50,
1632
+ help="Size of training window (default: %(default)s)",
1633
+ )
1634
+ parser.add_argument(
1635
+ "--test-size",
1636
+ type=int,
1637
+ default=20,
1638
+ help="Size of test window (default: %(default)s)",
1639
+ )
1640
+ parser.add_argument(
1641
+ "--gap",
1642
+ type=int,
1643
+ default=5,
1644
+ help="Gap between train and test (default: %(default)s)",
1645
+ )
1646
+ args = parser.parse_args()
1647
+ return args
1648
+
1649
+
1650
+ def run_main() -> None:
1651
+ """Initialize scitex framework, run main function, and cleanup."""
1652
+ global CONFIG, CC, sys, plt, rng
1653
+
1654
+ import sys
1655
+
1656
+ import matplotlib.pyplot as plt
1657
+ import scitex as stx
1658
+
1659
+ args = parse_args()
1660
+
1661
+ CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
1662
+ sys,
1663
+ plt,
1664
+ args=args,
1665
+ file=__FILE__,
1666
+ sdir_suffix=None,
1667
+ verbose=False,
1668
+ agg=True,
1669
+ )
1670
+
1671
+ exit_status = main(args)
1672
+
1673
+ stx.session.close(
1674
+ CONFIG,
1675
+ verbose=False,
1676
+ notify=False,
1677
+ message="",
1678
+ exit_status=exit_status,
1679
+ )
1680
+
1681
+
1682
+ if __name__ == "__main__":
1683
+ run_main()
1684
+
1685
+ # EOF