scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-09-21 20:45:00 (ywatanabe)"
4
+ # File: _TimeSeriesStrategy.py
5
+
6
+ """
7
+ Time series cross-validation strategy enumeration.
8
+
9
+ Defines available strategies for time series CV.
10
+ """
11
+
12
+ from enum import Enum
13
+
14
+
15
+ class TimeSeriesStrategy(Enum):
16
+ """
17
+ Available time series CV strategies.
18
+
19
+ Attributes
20
+ ----------
21
+ STRATIFIED : str
22
+ Single time series with class balance preservation
23
+ BLOCKING : str
24
+ Multiple independent time series (e.g., different patients)
25
+ SLIDING : str
26
+ Sliding window approach with fixed-size windows
27
+ EXPANDING : str
28
+ Expanding window where training set grows over time
29
+ FIXED : str
30
+ Fixed train/test split at specific time point
31
+ """
32
+
33
+ STRATIFIED = "stratified" # Single time series with class balance
34
+ BLOCKING = "blocking" # Multiple time series (e.g., patients)
35
+ SLIDING = "sliding" # Sliding window approach
36
+ EXPANDING = "expanding" # Expanding window (train grows)
37
+ FIXED = "fixed" # Fixed train/test split
38
+
39
+ @classmethod
40
+ def from_string(cls, value: str) -> 'TimeSeriesStrategy':
41
+ """
42
+ Create strategy from string value.
43
+
44
+ Parameters
45
+ ----------
46
+ value : str
47
+ String representation of strategy
48
+
49
+ Returns
50
+ -------
51
+ TimeSeriesStrategy
52
+ Corresponding enum value
53
+
54
+ Raises
55
+ ------
56
+ ValueError
57
+ If value doesn't match any strategy
58
+ """
59
+ value_lower = value.lower()
60
+ for strategy in cls:
61
+ if strategy.value == value_lower:
62
+ return strategy
63
+ raise ValueError(
64
+ f"Unknown strategy: {value}. "
65
+ f"Valid options are: {[s.value for s in cls]}"
66
+ )
67
+
68
+ def get_description(self) -> str:
69
+ """
70
+ Get human-readable description of the strategy.
71
+
72
+ Returns
73
+ -------
74
+ str
75
+ Description of the strategy
76
+ """
77
+ descriptions = {
78
+ self.STRATIFIED: "Maintains class balance while respecting time order",
79
+ self.BLOCKING: "Handles multiple independent time series",
80
+ self.SLIDING: "Uses fixed-size sliding windows through time",
81
+ self.EXPANDING: "Training set expands while test moves forward",
82
+ self.FIXED: "Single fixed split at specific time point"
83
+ }
84
+ return descriptions.get(self, "Unknown strategy")
@@ -0,0 +1,610 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-09-22 16:50:00 (ywatanabe)"
4
+ # File: _TimeSeriesStratifiedSplit.py
5
+
6
+ __FILE__ = "_TimeSeriesStratifiedSplit.py"
7
+
8
+ """
9
+ Functionalities:
10
+ - Implements time series cross-validation with stratification support
11
+ - Ensures chronological order (test data always after training data)
12
+ - Supports optional validation set between train and test
13
+ - Maintains temporal gaps to prevent data leakage
14
+ - Provides visualization with scatter plots for verification
15
+ - Validates temporal integrity in all splits
16
+
17
+ Dependencies:
18
+ - packages:
19
+ - numpy
20
+ - sklearn
21
+ - matplotlib
22
+
23
+ IO:
24
+ - input-files:
25
+ - None (generates synthetic data for demonstration)
26
+ - output-files:
27
+ - ./stratified_splits_demo.png (visualization)
28
+ """
29
+
30
+ """Imports"""
31
+ import os
32
+ import sys
33
+ import argparse
34
+ import numpy as np
35
+ from typing import Iterator, Optional, Tuple
36
+ from sklearn.model_selection import BaseCrossValidator
37
+ from sklearn.utils.validation import _num_samples
38
+ import matplotlib.pyplot as plt
39
+ import matplotlib.patches as patches
40
+ import scitex as stx
41
+ from scitex import logging
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class TimeSeriesStratifiedSplit(BaseCrossValidator):
47
+ """
48
+ Time series cross-validation with stratification support.
49
+
50
+ This splitter ensures:
51
+ 1. Test data is always chronologically after training data
52
+ 2. Optional validation set between train and test
53
+ 3. Class balance preservation in splits
54
+ 4. Gap period between train and test to avoid leakage
55
+
56
+ Parameters
57
+ ----------
58
+ n_splits : int
59
+ Number of splits (folds)
60
+ test_ratio : float
61
+ Proportion of data for test set (default: 0.2)
62
+ val_ratio : float
63
+ Proportion of data for validation set (default: 0.1)
64
+ gap : int
65
+ Number of samples to exclude between train and test (default: 0)
66
+ stratify : bool
67
+ Whether to maintain class proportions (default: True)
68
+ random_state : int, optional
69
+ Random seed for reproducibility (default: None)
70
+
71
+ Examples
72
+ --------
73
+ >>> from scitex.ml.classification import TimeSeriesStratifiedSplit
74
+ >>> import numpy as np
75
+ >>>
76
+ >>> X = np.random.randn(100, 10)
77
+ >>> y = np.random.randint(0, 2, 100)
78
+ >>> timestamps = np.arange(100)
79
+ >>>
80
+ >>> tscv = TimeSeriesStratifiedSplit(n_splits=3)
81
+ >>> for train_idx, test_idx in tscv.split(X, y, timestamps):
82
+ ... print(f"Train: {len(train_idx)}, Test: {len(test_idx)}")
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ n_splits: int = 5,
88
+ test_ratio: float = 0.2,
89
+ val_ratio: float = 0.1,
90
+ gap: int = 0,
91
+ stratify: bool = True,
92
+ random_state: Optional[int] = None,
93
+ ):
94
+ self.n_splits = n_splits
95
+ self.test_ratio = test_ratio
96
+ self.val_ratio = val_ratio
97
+ self.gap = gap
98
+ self.stratify = stratify
99
+ self.random_state = random_state
100
+ self.rng = np.random.default_rng(random_state)
101
+
102
+ def split(
103
+ self,
104
+ X: np.ndarray,
105
+ y: Optional[np.ndarray] = None,
106
+ timestamps: Optional[np.ndarray] = None,
107
+ groups: Optional[np.ndarray] = None,
108
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
109
+ """
110
+ Generate indices to split data into training and test sets.
111
+
112
+ Parameters
113
+ ----------
114
+ X : array-like, shape (n_samples, n_features)
115
+ Training data
116
+ y : array-like, shape (n_samples,)
117
+ Target variable
118
+ timestamps : array-like, shape (n_samples,)
119
+ Timestamps for temporal ordering (required)
120
+ groups : array-like, shape (n_samples,), optional
121
+ Group labels for grouped CV
122
+
123
+ Yields
124
+ ------
125
+ train : ndarray
126
+ Training set indices
127
+ test : ndarray
128
+ Test set indices
129
+ """
130
+ if timestamps is None:
131
+ raise ValueError("timestamps must be provided for time series split")
132
+
133
+ n_samples = _num_samples(X)
134
+ indices = np.arange(n_samples)
135
+
136
+ # Sort by timestamp
137
+ time_order = np.argsort(timestamps)
138
+ sorted_indices = indices[time_order]
139
+ sorted_y = y[time_order] if y is not None else None
140
+
141
+ # Calculate split sizes
142
+ test_size = int(n_samples * self.test_ratio)
143
+ val_size = int(n_samples * self.val_ratio) if self.val_ratio > 0 else 0
144
+
145
+ # Generate splits with expanding training window
146
+ for i in range(self.n_splits):
147
+ # Expanding window approach
148
+ train_end = n_samples - test_size - self.gap
149
+ train_end = train_end - (self.n_splits - i - 1) * (test_size // self.n_splits)
150
+ train_end = max(test_size, train_end) # Ensure min training size
151
+
152
+ # Apply gap and start test set immediately after gap
153
+ test_start = train_end + self.gap
154
+ test_end = min(test_start + test_size, n_samples)
155
+
156
+ # Get indices
157
+ train_indices = sorted_indices[:train_end]
158
+ test_indices = sorted_indices[test_start:test_end]
159
+
160
+ # For time series, temporal integrity is prioritized over stratification
161
+ # Chronological order must be preserved to prevent data leakage
162
+ # Class imbalance should be handled through other methods or at dataset level
163
+
164
+ yield train_indices, test_indices
165
+
166
+ def split_with_val(
167
+ self,
168
+ X: np.ndarray,
169
+ y: Optional[np.ndarray] = None,
170
+ timestamps: Optional[np.ndarray] = None,
171
+ groups: Optional[np.ndarray] = None,
172
+ ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
173
+ """
174
+ Generate indices with separate validation set.
175
+
176
+ Yields
177
+ ------
178
+ train : ndarray
179
+ Training set indices
180
+ val : ndarray
181
+ Validation set indices
182
+ test : ndarray
183
+ Test set indices
184
+ """
185
+ if timestamps is None:
186
+ raise ValueError("timestamps must be provided for time series split")
187
+
188
+ n_samples = _num_samples(X)
189
+ indices = np.arange(n_samples)
190
+
191
+ # Sort by timestamp
192
+ time_order = np.argsort(timestamps)
193
+ sorted_indices = indices[time_order]
194
+ sorted_y = y[time_order] if y is not None else None
195
+
196
+ # Calculate split sizes
197
+ test_size = int(n_samples * self.test_ratio)
198
+ val_size = int(n_samples * self.val_ratio) if self.val_ratio > 0 else 0
199
+
200
+ # Generate splits with strict temporal order
201
+ for i in range(self.n_splits):
202
+ # Calculate split points in temporal order (sorted domain)
203
+ # Work backwards from the end to ensure proper spacing
204
+ test_start_pos = n_samples - test_size
205
+ test_start_pos = test_start_pos - i * (test_size // self.n_splits) # Earlier for each fold
206
+ test_end_pos = min(test_start_pos + test_size, n_samples)
207
+
208
+ # Validation comes before test with gap
209
+ val_end_pos = test_start_pos - self.gap
210
+ val_start_pos = max(0, val_end_pos - val_size)
211
+
212
+ # Training comes before validation with gap
213
+ train_end_pos = val_start_pos - self.gap
214
+ train_start_pos = 0 # Always start from beginning (expanding window)
215
+
216
+ # Ensure all positions are valid
217
+ if train_end_pos <= train_start_pos or val_start_pos >= val_end_pos or test_start_pos >= test_end_pos:
218
+ continue
219
+
220
+ # Extract indices from temporally sorted sequence
221
+ train_indices = sorted_indices[train_start_pos:train_end_pos]
222
+ val_indices = sorted_indices[val_start_pos:val_end_pos]
223
+ test_indices = sorted_indices[test_start_pos:test_end_pos]
224
+
225
+ # For split_with_val, we prioritize temporal integrity over stratification
226
+ # to ensure no overlapping between train, validation, and test sets
227
+ # Class imbalance should be handled through other methods for 3-way splits
228
+
229
+ yield train_indices, val_indices, test_indices
230
+
231
+ def _stratify_indices_temporal(
232
+ self, indices: np.ndarray, y: np.ndarray, target_size: int
233
+ ) -> np.ndarray:
234
+ """Apply stratification while preserving temporal order for time series.
235
+
236
+ This method maintains chronological order as the top priority while
237
+ attempting to balance class representation within the temporal window.
238
+ """
239
+ # If target_size >= current size, return as-is
240
+ if target_size >= len(indices):
241
+ return indices
242
+
243
+ # Get the labels for these indices in their current temporal order
244
+ current_labels = y[indices]
245
+ unique_classes = np.unique(current_labels)
246
+
247
+ # Calculate desired samples per class based on current distribution
248
+ class_counts = {}
249
+ for cls in unique_classes:
250
+ class_counts[cls] = np.sum(current_labels == cls)
251
+
252
+ total_current = len(indices)
253
+
254
+ # Calculate target samples per class, proportional to current distribution
255
+ target_per_class = {}
256
+ remaining_target = target_size
257
+
258
+ for cls in unique_classes:
259
+ proportion = class_counts[cls] / total_current
260
+ target_count = max(1, int(target_size * proportion))
261
+ target_per_class[cls] = min(target_count, class_counts[cls])
262
+ remaining_target -= target_per_class[cls]
263
+
264
+ # Adjust if we're under/over the target
265
+ if remaining_target > 0:
266
+ # Distribute remaining samples to classes with most samples
267
+ sorted_classes = sorted(unique_classes,
268
+ key=lambda x: class_counts[x], reverse=True)
269
+ for cls in sorted_classes:
270
+ if remaining_target <= 0:
271
+ break
272
+ if target_per_class[cls] < class_counts[cls]:
273
+ target_per_class[cls] += 1
274
+ remaining_target -= 1
275
+
276
+ # Select indices while preserving temporal order
277
+ selected_indices = []
278
+ class_taken = {cls: 0 for cls in unique_classes}
279
+
280
+ for idx in indices: # indices are already in temporal order
281
+ label = y[idx]
282
+ if class_taken[label] < target_per_class[label]:
283
+ selected_indices.append(idx)
284
+ class_taken[label] += 1
285
+
286
+ # Stop if we've reached our target
287
+ if len(selected_indices) >= target_size:
288
+ break
289
+
290
+ return np.array(selected_indices)
291
+
292
+ def get_n_splits(self, X=None, y=None, groups=None):
293
+ """Returns the number of splitting iterations in the CV."""
294
+ return self.n_splits
295
+
296
+ def _find_contiguous_segments(self, indices):
297
+ """Find contiguous segments in a sorted array of indices."""
298
+ if len(indices) == 0:
299
+ return []
300
+
301
+ sorted_indices = np.sort(indices)
302
+ segments = []
303
+ start = sorted_indices[0]
304
+ end = sorted_indices[0]
305
+
306
+ for i in range(1, len(sorted_indices)):
307
+ if sorted_indices[i] == end + 1:
308
+ end = sorted_indices[i]
309
+ else:
310
+ segments.append((start, end))
311
+ start = sorted_indices[i]
312
+ end = sorted_indices[i]
313
+
314
+ segments.append((start, end))
315
+ return segments
316
+
317
+ def plot_splits(self, X, y=None, timestamps=None, figsize=(12, 6), save_path=None):
318
+ """
319
+ Visualize the stratified time series splits.
320
+
321
+ Shows train (blue), validation (green), and test (red) sets.
322
+ When val_ratio=0, only shows train and test.
323
+
324
+ Parameters
325
+ ----------
326
+ X : array-like
327
+ Training data
328
+ y : array-like, optional
329
+ Target variable
330
+ timestamps : array-like, optional
331
+ Timestamps (if None, uses sample indices)
332
+ figsize : tuple, default (12, 6)
333
+ Figure size
334
+ save_path : str, optional
335
+ Path to save the plot
336
+
337
+ Returns
338
+ -------
339
+ fig : matplotlib.figure.Figure
340
+ The created figure
341
+ """
342
+ # Use sample indices if no timestamps provided
343
+ if timestamps is None:
344
+ timestamps = np.arange(len(X))
345
+
346
+ # Create figure
347
+ fig, ax = plt.subplots(figsize=figsize)
348
+
349
+ # Check if we have validation sets
350
+ if self.val_ratio > 0:
351
+ # Use split_with_val for 3-way splits
352
+ splits = list(self.split_with_val(X, y, timestamps))
353
+ split_type = "train-val-test"
354
+ else:
355
+ # Use regular split for 2-way splits
356
+ splits = list(self.split(X, y, timestamps))
357
+ split_type = "train-test"
358
+
359
+ if not splits:
360
+ raise ValueError("No splits generated")
361
+
362
+ # Plot each fold
363
+ for fold, split_indices in enumerate(splits):
364
+ y_pos = fold
365
+
366
+ if len(split_indices) == 3: # train, val, test
367
+ train_idx, val_idx, test_idx = split_indices
368
+
369
+ # Train set (blue) - plot as individual segments if non-contiguous
370
+ if len(train_idx) > 0:
371
+ # Find contiguous segments in train indices
372
+ train_segments = self._find_contiguous_segments(train_idx)
373
+ for start_idx, end_idx in train_segments:
374
+ train_rect = patches.Rectangle(
375
+ (start_idx, y_pos - 0.3),
376
+ end_idx - start_idx + 1, 0.6,
377
+ linewidth=1, edgecolor='blue', facecolor='lightblue', alpha=0.7,
378
+ label='Train' if fold == 0 and start_idx == train_segments[0][0] else ""
379
+ )
380
+ ax.add_patch(train_rect)
381
+
382
+ # Validation set (green) - plot as individual segments if non-contiguous
383
+ if len(val_idx) > 0:
384
+ val_segments = self._find_contiguous_segments(val_idx)
385
+ for start_idx, end_idx in val_segments:
386
+ val_rect = patches.Rectangle(
387
+ (start_idx, y_pos - 0.3),
388
+ end_idx - start_idx + 1, 0.6,
389
+ linewidth=1, edgecolor='green', facecolor='lightgreen', alpha=0.7,
390
+ label='Validation' if fold == 0 and start_idx == val_segments[0][0] else ""
391
+ )
392
+ ax.add_patch(val_rect)
393
+
394
+ # Test set (red) - plot as individual segments if non-contiguous
395
+ if len(test_idx) > 0:
396
+ test_segments = self._find_contiguous_segments(test_idx)
397
+ for start_idx, end_idx in test_segments:
398
+ test_rect = patches.Rectangle(
399
+ (start_idx, y_pos - 0.3),
400
+ end_idx - start_idx + 1, 0.6,
401
+ linewidth=1, edgecolor='red', facecolor='lightcoral', alpha=0.7,
402
+ label='Test' if fold == 0 and start_idx == test_segments[0][0] else ""
403
+ )
404
+ ax.add_patch(test_rect)
405
+
406
+ else: # train, test (2-way split)
407
+ train_idx, test_idx = split_indices
408
+
409
+ # Train set (blue) - plot as individual segments if non-contiguous
410
+ if len(train_idx) > 0:
411
+ train_segments = self._find_contiguous_segments(train_idx)
412
+ for start_idx, end_idx in train_segments:
413
+ train_rect = patches.Rectangle(
414
+ (start_idx, y_pos - 0.3),
415
+ end_idx - start_idx + 1, 0.6,
416
+ linewidth=1, edgecolor='blue', facecolor='lightblue', alpha=0.7,
417
+ label='Train' if fold == 0 and start_idx == train_segments[0][0] else ""
418
+ )
419
+ ax.add_patch(train_rect)
420
+
421
+ # Test set (red) - plot as individual segments if non-contiguous
422
+ if len(test_idx) > 0:
423
+ test_segments = self._find_contiguous_segments(test_idx)
424
+ for start_idx, end_idx in test_segments:
425
+ test_rect = patches.Rectangle(
426
+ (start_idx, y_pos - 0.3),
427
+ end_idx - start_idx + 1, 0.6,
428
+ linewidth=1, edgecolor='red', facecolor='lightcoral', alpha=0.7,
429
+ label='Test' if fold == 0 and start_idx == test_segments[0][0] else ""
430
+ )
431
+ ax.add_patch(test_rect)
432
+
433
+ # Add scatter plots of actual data points with jittering
434
+ np.random.seed(42) # For reproducible jittering
435
+ jitter_strength = 0.15 # Amount of vertical jittering
436
+
437
+ for fold, split_indices in enumerate(splits):
438
+ y_pos = fold
439
+
440
+ if len(split_indices) == 3: # train, val, test
441
+ train_idx, val_idx, test_idx = split_indices
442
+
443
+ # Add jittered scatter plots for 3-way split
444
+ if len(train_idx) > 0:
445
+ train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
446
+ ax.scatter(train_idx, y_pos + train_jitter,
447
+ c='darkblue', s=15, alpha=0.6, marker='o',
448
+ label='Train points' if fold == 0 else '', zorder=3)
449
+
450
+ if len(val_idx) > 0:
451
+ val_jitter = np.random.normal(0, jitter_strength, len(val_idx))
452
+ ax.scatter(val_idx, y_pos + val_jitter,
453
+ c='darkgreen', s=15, alpha=0.6, marker='^',
454
+ label='Val points' if fold == 0 else '', zorder=3)
455
+
456
+ if len(test_idx) > 0:
457
+ test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
458
+ ax.scatter(test_idx, y_pos + test_jitter,
459
+ c='darkred', s=15, alpha=0.6, marker='s',
460
+ label='Test points' if fold == 0 else '', zorder=3)
461
+
462
+ else: # train, test (2-way split)
463
+ train_idx, test_idx = split_indices
464
+
465
+ # Add jittered scatter plots for 2-way split
466
+ if len(train_idx) > 0:
467
+ train_jitter = np.random.normal(0, jitter_strength, len(train_idx))
468
+ ax.scatter(train_idx, y_pos + train_jitter,
469
+ c='darkblue', s=15, alpha=0.6, marker='o',
470
+ label='Train points' if fold == 0 else '', zorder=3)
471
+
472
+ if len(test_idx) > 0:
473
+ test_jitter = np.random.normal(0, jitter_strength, len(test_idx))
474
+ ax.scatter(test_idx, y_pos + test_jitter,
475
+ c='darkred', s=15, alpha=0.6, marker='s',
476
+ label='Test points' if fold == 0 else '', zorder=3)
477
+
478
+ # Format plot
479
+ ax.set_ylim(-0.5, len(splits) - 0.5)
480
+ ax.set_xlim(0, len(X))
481
+ ax.set_xlabel('Sample Index (original order)')
482
+ ax.set_ylabel('Fold')
483
+
484
+ title = f'Time Series Stratified Split Visualization ({split_type})'
485
+ if self.stratify:
486
+ title += '\nMaintains class balance across splits'
487
+ if self.gap > 0:
488
+ title += f', Gap: {self.gap} samples'
489
+ title += '\nRectangles show ranges, dots show actual data points'
490
+ ax.set_title(title)
491
+
492
+ # Set y-ticks
493
+ ax.set_yticks(range(len(splits)))
494
+ ax.set_yticklabels([f'Fold {i}' for i in range(len(splits))])
495
+
496
+ # Add legend with scatter points
497
+ ax.legend(loc='upper right')
498
+
499
+ plt.tight_layout()
500
+
501
+ if save_path:
502
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
503
+
504
+ return fig
505
+
506
+
507
+ """Functions & Classes"""
508
+ def main(args) -> int:
509
+ """Demonstrate TimeSeriesStratifiedSplit functionality.
510
+
511
+ Args:
512
+ args: Command line arguments
513
+
514
+ Returns:
515
+ int: Exit status
516
+ """
517
+ logger.info("Demonstrating TimeSeriesStratifiedSplit functionality")
518
+
519
+ # Generate test data
520
+ np.random.seed(42)
521
+ n_samples = 200
522
+ X = np.random.randn(n_samples, 5)
523
+ y = np.random.randint(0, 2, n_samples)
524
+ timestamps = np.arange(n_samples) + np.random.normal(0, 0.1, n_samples)
525
+
526
+ logger.info(f"Generated test data: {n_samples} samples, {X.shape[1]} features, {len(np.unique(y))} classes")
527
+
528
+ # Test regular split
529
+ logger.info("Testing regular train/test split")
530
+ splitter = TimeSeriesStratifiedSplit(n_splits=3, test_ratio=0.2, gap=5)
531
+ for fold, (train_idx, test_idx) in enumerate(splitter.split(X, y, timestamps)):
532
+ logger.info(f"Fold {fold}: Train={len(train_idx)}, Test={len(test_idx)}")
533
+
534
+ # Test split with validation
535
+ logger.info("Testing train/validation/test split")
536
+ splitter_val = TimeSeriesStratifiedSplit(n_splits=2, test_ratio=0.2, val_ratio=0.15, gap=3)
537
+ for fold, (train_idx, val_idx, test_idx) in enumerate(splitter_val.split_with_val(X, y, timestamps)):
538
+ logger.info(f"Fold {fold}: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}")
539
+
540
+ # Check temporal order
541
+ train_times = timestamps[train_idx]
542
+ val_times = timestamps[val_idx] if len(val_idx) > 0 else np.array([])
543
+ test_times = timestamps[test_idx] if len(test_idx) > 0 else np.array([])
544
+
545
+ temporal_ok = True
546
+ if len(val_times) > 0 and len(test_times) > 0:
547
+ temporal_ok = (train_times.max() < val_times.min()) and (val_times.max() < test_times.min())
548
+ elif len(test_times) > 0:
549
+ temporal_ok = train_times.max() < test_times.min()
550
+
551
+ status = "✓" if temporal_ok else "✗"
552
+ logger.info(f" Temporal order: {status}")
553
+
554
+ # Generate visualization
555
+ logger.info("Generating split visualization")
556
+ fig = splitter_val.plot_splits(X, y, timestamps)
557
+
558
+ # Save using SciTeX framework
559
+ stx.io.save(fig, "./stratified_splits_demo.png", symlink_from_cwd=True)
560
+ plt.close(fig)
561
+
562
+ logger.info("TimeSeriesStratifiedSplit demonstration completed successfully")
563
+ return 0
564
+
565
+
566
+ def parse_args() -> argparse.Namespace:
567
+ """Parse command line arguments."""
568
+ import argparse
569
+ parser = argparse.ArgumentParser(
570
+ description='Demonstrate TimeSeriesStratifiedSplit with temporal integrity validation'
571
+ )
572
+ args = parser.parse_args()
573
+ return args
574
+
575
+
576
+ def run_main() -> None:
577
+ """Initialize scitex framework, run main function, and cleanup."""
578
+ global CONFIG, CC, sys, plt, rng
579
+
580
+ import sys
581
+ import matplotlib.pyplot as plt
582
+ import scitex as stx
583
+
584
+ args = parse_args()
585
+
586
+ CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
587
+ sys,
588
+ plt,
589
+ args=args,
590
+ file=__FILE__,
591
+ sdir_suffix=None,
592
+ verbose=False,
593
+ agg=True,
594
+ )
595
+
596
+ exit_status = main(args)
597
+
598
+ stx.session.close(
599
+ CONFIG,
600
+ verbose=False,
601
+ notify=False,
602
+ message="",
603
+ exit_status=exit_status,
604
+ )
605
+
606
+
607
+ if __name__ == '__main__':
608
+ run_main()
609
+
610
+ # EOF