scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,616 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-12 05:30:00 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/io/utils/h5_to_zarr.py
5
+ # ----------------------------------------
6
+ import os
7
+ __FILE__ = __file__
8
+ __DIR__ = os.path.dirname(__FILE__)
9
+ # ----------------------------------------
10
+
11
+ THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/io/utils/h5_to_zarr.py"
12
+
13
+ """
14
+ 1. Functionality:
15
+ - Migrates HDF5 files to Zarr format
16
+ - Preserves hierarchical structure and attributes
17
+ - Supports batch migration of multiple files
18
+ 2. Input:
19
+ - HDF5 file path(s)
20
+ - Optional Zarr output path(s)
21
+ - Migration options (compression, chunking)
22
+ 3. Output:
23
+ - Zarr store(s) with migrated data
24
+ 4. Prerequisites:
25
+ - h5py, zarr, numpy
26
+ """
27
+
28
+ """Imports"""
29
+ import h5py
30
+ import zarr
31
+ import numpy as np
32
+ import os
33
+ from pathlib import Path
34
+ from typing import Optional, Union, Dict, Any, List, Tuple
35
+ import warnings
36
+ from tqdm import tqdm
37
+
38
+ from ...errors import (
39
+ IOError as SciTeXIOError,
40
+ FileFormatError,
41
+ PathNotFoundError,
42
+ check_file_exists,
43
+ check_path,
44
+ warn_data_loss,
45
+ warn_performance
46
+ )
47
+
48
+
49
+ def _get_zarr_compressor(compressor: Optional[Union[str, Any]] = "zstd") -> Optional[Any]:
50
+ """Get Zarr compressor object from string name."""
51
+ if compressor is None:
52
+ return None
53
+
54
+ if not isinstance(compressor, str):
55
+ return compressor
56
+
57
+ from numcodecs import Zstd, LZ4, GZip, Blosc
58
+
59
+ compressor_map = {
60
+ "zstd": Zstd(level=3),
61
+ "lz4": LZ4(acceleration=1),
62
+ "gzip": GZip(level=5),
63
+ "blosc": Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
64
+ }
65
+
66
+ return compressor_map.get(compressor.lower(), Zstd(level=3))
67
+
68
+
69
+ def _infer_chunks(shape: Tuple[int, ...], dtype: np.dtype,
70
+ target_chunk_mb: float = 10.0) -> Tuple[int, ...]:
71
+ """Infer reasonable chunk sizes based on array shape and dtype."""
72
+ if len(shape) == 0: # Scalar
73
+ return None
74
+
75
+ # Calculate bytes per element
76
+ bytes_per_element = dtype.itemsize
77
+
78
+ # Target chunk size in elements
79
+ target_elements = (target_chunk_mb * 1024 * 1024) / bytes_per_element
80
+
81
+ # Calculate chunk shape
82
+ chunks = []
83
+ remaining_elements = target_elements
84
+
85
+ for dim_size in shape:
86
+ if remaining_elements <= 1:
87
+ chunks.append(1)
88
+ else:
89
+ chunk_dim = min(dim_size, int(remaining_elements))
90
+ chunks.append(chunk_dim)
91
+ remaining_elements = remaining_elements / chunk_dim
92
+
93
+ return tuple(chunks)
94
+
95
+
96
+ def _copy_h5_attributes(h5_obj: Union[h5py.Group, h5py.Dataset],
97
+ zarr_obj: Union[zarr.Group, zarr.Array]) -> None:
98
+ """Copy attributes from HDF5 object to Zarr object."""
99
+ for key, value in h5_obj.attrs.items():
100
+ try:
101
+ # Handle special cases
102
+ if isinstance(value, bytes):
103
+ value = value.decode('utf-8', errors='replace')
104
+ elif isinstance(value, np.ndarray) and value.dtype.kind == 'S':
105
+ # Byte string array
106
+ value = [v.decode('utf-8', errors='replace') for v in value]
107
+ elif isinstance(value, (np.integer, np.floating)):
108
+ value = value.item() # Convert to Python type
109
+
110
+ zarr_obj.attrs[key] = value
111
+ except Exception as e:
112
+ warnings.warn(f"Could not copy attribute '{key}': {e}")
113
+
114
+
115
+ def _migrate_dataset(h5_dataset: h5py.Dataset,
116
+ zarr_parent: zarr.Group,
117
+ name: str,
118
+ compressor: Optional[Any],
119
+ chunks: Optional[Union[bool, Tuple[int, ...]]] = True,
120
+ show_progress: bool = False) -> zarr.Array:
121
+ """Migrate a single HDF5 dataset to Zarr."""
122
+ try:
123
+ # Try to access the dataset to check if it's corrupted
124
+ test_access = h5_dataset.shape
125
+ if hasattr(h5_dataset, 'dtype'):
126
+ test_dtype = h5_dataset.dtype
127
+ except Exception as e:
128
+ warnings.warn(f"Skipping corrupted dataset '{name}': {e}")
129
+ return None
130
+
131
+ # Get dataset info
132
+ shape = h5_dataset.shape
133
+ dtype = h5_dataset.dtype
134
+
135
+ # Handle chunking
136
+ if chunks is True:
137
+ # Auto-infer chunks
138
+ dataset_chunks = _infer_chunks(shape, dtype)
139
+ elif chunks is False:
140
+ dataset_chunks = None
141
+ else:
142
+ dataset_chunks = chunks
143
+
144
+ # Handle special dtypes
145
+ if dtype.kind == 'O': # Object dtype
146
+ warn_data_loss(
147
+ f"Dataset '{name}'",
148
+ "Object dtype will be converted to string or pickled"
149
+ )
150
+ # Try to convert to string array
151
+ try:
152
+ # Check if scalar or array
153
+ if not h5_dataset.shape: # Scalar dataset
154
+ value = h5_dataset[()]
155
+ if isinstance(value, (bytes, str)):
156
+ # String scalar - store as 0-d string array
157
+ zarr_array = zarr_parent.create_dataset(
158
+ name,
159
+ data=str(value) if isinstance(value, str) else value.decode('utf-8', errors='replace'),
160
+ dtype=str,
161
+ compressor=None
162
+ )
163
+ else:
164
+ # Complex object scalar - pickle
165
+ import pickle
166
+ pickled_data = pickle.dumps(value)
167
+ zarr_array = zarr_parent.create_dataset(
168
+ name,
169
+ data=np.frombuffer(pickled_data, dtype=np.uint8),
170
+ compressor=compressor
171
+ )
172
+ zarr_array.attrs["_type"] = "pickled_scalar"
173
+ elif len(h5_dataset) > 0: # Non-empty array
174
+ first_elem = h5_dataset[0]
175
+ if isinstance(first_elem, (bytes, str)):
176
+ # String data - convert to string array
177
+ data = np.array([
178
+ str(item) if isinstance(item, str) else item.decode('utf-8', errors='replace')
179
+ for item in h5_dataset[:]
180
+ ])
181
+ zarr_array = zarr_parent.create_dataset(
182
+ name,
183
+ data=data,
184
+ dtype=data.dtype, # Will be string dtype
185
+ compressor=None
186
+ )
187
+ else:
188
+ # Complex object - pickle
189
+ import pickle
190
+ pickled_data = pickle.dumps(h5_dataset[:])
191
+ zarr_array = zarr_parent.create_dataset(
192
+ name,
193
+ data=np.frombuffer(pickled_data, dtype=np.uint8),
194
+ compressor=compressor
195
+ )
196
+ zarr_array.attrs["_type"] = "pickled"
197
+ else:
198
+ # Empty dataset - use empty string array
199
+ zarr_array = zarr_parent.create_dataset(
200
+ name,
201
+ shape=shape,
202
+ dtype='U1', # Unicode string dtype
203
+ fill_value=''
204
+ )
205
+ except Exception as e:
206
+ raise SciTeXIOError(
207
+ f"Failed to migrate object dtype dataset '{name}'",
208
+ context={"error": str(e)},
209
+ suggestion="Consider converting object arrays before migration"
210
+ )
211
+ else:
212
+ # Regular array
213
+ if show_progress and shape and np.prod(shape) > 1e6:
214
+ # Large array - show progress
215
+ print(f" Migrating large dataset '{name}' {shape} {dtype}...")
216
+
217
+ # Create Zarr array
218
+ zarr_array = zarr_parent.create_dataset(
219
+ name,
220
+ shape=shape,
221
+ dtype=dtype,
222
+ chunks=dataset_chunks,
223
+ compressor=compressor
224
+ )
225
+
226
+ # Copy data
227
+ try:
228
+ if shape: # Non-scalar
229
+ if len(shape) > 0 and np.prod(shape) > 0:
230
+ zarr_array[:] = h5_dataset[:]
231
+ else: # Scalar
232
+ zarr_array[()] = h5_dataset[()]
233
+ except Exception as e:
234
+ warnings.warn(f"Error copying data for dataset '{name}': {e}. Leaving empty.")
235
+ # The array structure is created but data might be zeros/empty
236
+
237
+ # Copy attributes
238
+ _copy_h5_attributes(h5_dataset, zarr_array)
239
+
240
+ return zarr_array
241
+
242
+
243
+ def _migrate_group(h5_group: h5py.Group,
244
+ zarr_parent: zarr.Group,
245
+ compressor: Optional[Any],
246
+ chunks: Optional[Union[bool, Tuple[int, ...]]] = True,
247
+ show_progress: bool = False,
248
+ _level: int = 0) -> None:
249
+ """Recursively migrate HDF5 group to Zarr."""
250
+ # Copy group attributes
251
+ _copy_h5_attributes(h5_group, zarr_parent)
252
+
253
+ # Iterate through group items
254
+ try:
255
+ keys = list(h5_group.keys())
256
+ except Exception as e:
257
+ warnings.warn(f"Cannot access group keys: {e}")
258
+ return
259
+
260
+ for key in keys:
261
+ try:
262
+ item = h5_group[key]
263
+ except Exception as e:
264
+ warnings.warn(f"Cannot access item '{key}': {e}")
265
+ continue
266
+
267
+ if isinstance(item, h5py.Dataset):
268
+ # Migrate dataset
269
+ result = _migrate_dataset(item, zarr_parent, key, compressor, chunks, show_progress)
270
+ if result is None:
271
+ print(f" Warning: Skipped corrupted dataset '{key}'")
272
+
273
+ elif isinstance(item, h5py.Group):
274
+ # Create subgroup and migrate recursively
275
+ if show_progress and _level < 2:
276
+ print(f"{' ' * _level}Migrating group '{key}'...")
277
+
278
+ zarr_subgroup = zarr_parent.create_group(key)
279
+ _migrate_group(item, zarr_subgroup, compressor, chunks,
280
+ show_progress, _level + 1)
281
+
282
+ else:
283
+ warnings.warn(f"Unknown HDF5 object type for '{key}': {type(item)}")
284
+
285
+
286
+ def migrate_h5_to_zarr(h5_path: Union[str, Path],
287
+ zarr_path: Optional[Union[str, Path]] = None,
288
+ compressor: Optional[Union[str, Any]] = "zstd",
289
+ chunks: Optional[Union[bool, Tuple[int, ...]]] = True,
290
+ overwrite: bool = False,
291
+ show_progress: bool = True,
292
+ validate: bool = True) -> str:
293
+ """
294
+ Migrate HDF5 file to Zarr format.
295
+
296
+ Parameters
297
+ ----------
298
+ h5_path : str or Path
299
+ Path to input HDF5 file
300
+ zarr_path : str or Path, optional
301
+ Path for output Zarr store. If None, uses h5_path with .zarr extension
302
+ compressor : str or compressor object, optional
303
+ Compression to use: 'zstd', 'lz4', 'gzip', 'blosc', or None
304
+ chunks : bool or tuple, optional
305
+ Chunking strategy. True for auto, False for no chunks, or specific shape
306
+ overwrite : bool, optional
307
+ Whether to overwrite existing Zarr store
308
+ show_progress : bool, optional
309
+ Whether to show migration progress
310
+ validate : bool, optional
311
+ Whether to validate the migration by comparing shapes
312
+
313
+ Returns
314
+ -------
315
+ str
316
+ Path to created Zarr store
317
+
318
+ Raises
319
+ ------
320
+ PathNotFoundError
321
+ If HDF5 file doesn't exist
322
+ FileFormatError
323
+ If input is not a valid HDF5 file
324
+ SciTeXIOError
325
+ If migration fails
326
+
327
+ Examples
328
+ --------
329
+ >>> # Basic migration
330
+ >>> migrate_h5_to_zarr("data.h5")
331
+
332
+ >>> # Custom output and compression
333
+ >>> migrate_h5_to_zarr("data.h5", "output.zarr", compressor="lz4")
334
+
335
+ >>> # Specific chunking
336
+ >>> migrate_h5_to_zarr("large_data.h5", chunks=(100, 100, 10))
337
+ """
338
+ # Validate input path
339
+ h5_path = Path(h5_path)
340
+ # Allow absolute paths
341
+ if not h5_path.is_absolute():
342
+ check_file_exists(str(h5_path))
343
+ else:
344
+ # For absolute paths, just check existence
345
+ if not h5_path.exists():
346
+ raise PathNotFoundError(str(h5_path))
347
+
348
+ # Determine output path
349
+ if zarr_path is None:
350
+ zarr_path = h5_path.with_suffix('.zarr')
351
+ else:
352
+ zarr_path = Path(zarr_path)
353
+ # Allow absolute paths if explicitly provided
354
+ if not zarr_path.is_absolute():
355
+ check_path(str(zarr_path))
356
+
357
+ # Check if output exists
358
+ if zarr_path.exists() and not overwrite:
359
+ raise SciTeXIOError(
360
+ f"Zarr store already exists: {zarr_path}",
361
+ suggestion="Use overwrite=True to replace existing store"
362
+ )
363
+
364
+ # Get compressor
365
+ compressor_obj = _get_zarr_compressor(compressor)
366
+
367
+ if show_progress:
368
+ print(f"Migrating HDF5 to Zarr:")
369
+ print(f" Source: {h5_path}")
370
+ print(f" Target: {zarr_path}")
371
+ print(f" Compressor: {compressor}")
372
+
373
+ try:
374
+ # Open HDF5 file
375
+ with h5py.File(str(h5_path), 'r') as h5_file:
376
+ # Create or open Zarr store
377
+ if zarr_path.exists() and overwrite:
378
+ import shutil
379
+ shutil.rmtree(zarr_path)
380
+
381
+ zarr_store = zarr.open(str(zarr_path), mode='w')
382
+
383
+ # Migrate root attributes
384
+ _copy_h5_attributes(h5_file, zarr_store)
385
+
386
+ # Migrate all groups and datasets
387
+ _migrate_group(h5_file, zarr_store, compressor_obj, chunks, show_progress)
388
+
389
+ if show_progress:
390
+ print("Migration complete!")
391
+
392
+ # Validation
393
+ if validate:
394
+ if show_progress:
395
+ print("Validating migration...")
396
+ _validate_migration(h5_file, zarr_store, show_progress)
397
+
398
+ except OSError as e:
399
+ if "Unable to open file" in str(e) or "bad symbol table" in str(e):
400
+ # File is corrupted
401
+ warnings.warn(f"HDF5 file appears to be corrupted: {h5_path}")
402
+ raise FileFormatError(
403
+ str(h5_path),
404
+ expected_format="HDF5",
405
+ actual_format="corrupted HDF5"
406
+ )
407
+ else:
408
+ raise SciTeXIOError(
409
+ f"Failed to open HDF5 file: {h5_path}",
410
+ context={"error": str(e)}
411
+ )
412
+ except Exception as e:
413
+ raise SciTeXIOError(
414
+ f"Migration failed: {str(e)}",
415
+ context={"h5_path": str(h5_path), "zarr_path": str(zarr_path)},
416
+ suggestion="Check file permissions and disk space"
417
+ )
418
+
419
+ return str(zarr_path)
420
+
421
+
422
+ def _validate_migration(h5_file: h5py.File, zarr_store: zarr.Group,
423
+ show_progress: bool = False) -> None:
424
+ """Validate that migration preserved data structure."""
425
+ def validate_item(h5_item, zarr_item, path=""):
426
+ if isinstance(h5_item, h5py.Dataset) and isinstance(zarr_item, zarr.Array):
427
+ # Compare shapes
428
+ if h5_item.shape != zarr_item.shape:
429
+ raise SciTeXIOError(
430
+ f"Shape mismatch at {path}",
431
+ context={
432
+ "h5_shape": h5_item.shape,
433
+ "zarr_shape": zarr_item.shape
434
+ }
435
+ )
436
+ # Compare dtypes (approximately)
437
+ if h5_item.dtype.kind != 'O' and zarr_item.dtype.kind != 'O':
438
+ if h5_item.dtype != zarr_item.dtype:
439
+ warnings.warn(
440
+ f"Dtype mismatch at {path}: "
441
+ f"HDF5={h5_item.dtype}, Zarr={zarr_item.dtype}"
442
+ )
443
+
444
+ elif isinstance(h5_item, h5py.Group) and isinstance(zarr_item, zarr.Group):
445
+ # Compare keys
446
+ h5_keys = set(h5_item.keys())
447
+ zarr_keys = set(zarr_item.keys())
448
+
449
+ if h5_keys != zarr_keys:
450
+ raise SciTeXIOError(
451
+ f"Key mismatch at {path}",
452
+ context={
453
+ "h5_only": h5_keys - zarr_keys,
454
+ "zarr_only": zarr_keys - h5_keys
455
+ }
456
+ )
457
+
458
+ # Validate recursively
459
+ for key in h5_keys:
460
+ validate_item(h5_item[key], zarr_item[key], f"{path}/{key}")
461
+
462
+ validate_item(h5_file, zarr_store)
463
+
464
+ if show_progress:
465
+ print(" Validation passed ✓")
466
+
467
+
468
+ def migrate_h5_to_zarr_batch(h5_paths: List[Union[str, Path]],
469
+ output_dir: Optional[Union[str, Path]] = None,
470
+ compressor: Optional[Union[str, Any]] = "zstd",
471
+ chunks: Optional[Union[bool, Tuple[int, ...]]] = True,
472
+ overwrite: bool = False,
473
+ parallel: bool = False,
474
+ n_workers: Optional[int] = None) -> List[str]:
475
+ """
476
+ Migrate multiple HDF5 files to Zarr format.
477
+
478
+ Parameters
479
+ ----------
480
+ h5_paths : list of str or Path
481
+ List of HDF5 files to migrate
482
+ output_dir : str or Path, optional
483
+ Directory for output Zarr stores. If None, creates alongside HDF5 files
484
+ compressor : str or compressor object, optional
485
+ Compression to use
486
+ chunks : bool or tuple, optional
487
+ Chunking strategy
488
+ overwrite : bool, optional
489
+ Whether to overwrite existing Zarr stores
490
+ parallel : bool, optional
491
+ Whether to process files in parallel
492
+ n_workers : int, optional
493
+ Number of parallel workers (defaults to CPU count)
494
+
495
+ Returns
496
+ -------
497
+ list of str
498
+ Paths to created Zarr stores
499
+
500
+ Examples
501
+ --------
502
+ >>> # Migrate all HDF5 files in directory
503
+ >>> import glob
504
+ >>> h5_files = glob.glob("data/*.h5")
505
+ >>> zarr_paths = migrate_h5_to_zarr_batch(h5_files)
506
+
507
+ >>> # Parallel migration to specific directory
508
+ >>> zarr_paths = migrate_h5_to_zarr_batch(
509
+ ... h5_files,
510
+ ... output_dir="zarr_data/",
511
+ ... parallel=True
512
+ ... )
513
+ """
514
+ h5_paths = [Path(p) for p in h5_paths]
515
+
516
+ # Determine output paths
517
+ zarr_paths = []
518
+ for h5_path in h5_paths:
519
+ if output_dir is None:
520
+ zarr_path = h5_path.with_suffix('.zarr')
521
+ else:
522
+ output_dir_path = Path(output_dir)
523
+ output_dir_path.mkdir(parents=True, exist_ok=True)
524
+ zarr_path = output_dir_path / h5_path.with_suffix('.zarr').name
525
+ zarr_paths.append(zarr_path)
526
+
527
+ print(f"Migrating {len(h5_paths)} HDF5 files to Zarr format...")
528
+
529
+ if parallel and len(h5_paths) > 1:
530
+ # Parallel processing
531
+ from concurrent.futures import ProcessPoolExecutor, as_completed
532
+
533
+ if n_workers is None:
534
+ n_workers = min(os.cpu_count() or 4, len(h5_paths))
535
+
536
+ print(f"Using {n_workers} parallel workers...")
537
+
538
+ # Define a module-level function to avoid pickling issues
539
+ import functools
540
+ migrate_func = functools.partial(
541
+ migrate_h5_to_zarr,
542
+ compressor=compressor,
543
+ chunks=chunks,
544
+ overwrite=overwrite,
545
+ show_progress=False,
546
+ validate=True
547
+ )
548
+
549
+ with ProcessPoolExecutor(max_workers=n_workers) as executor:
550
+ futures = {
551
+ executor.submit(migrate_func, h5_path, zarr_path): i
552
+ for i, (h5_path, zarr_path) in enumerate(zip(h5_paths, zarr_paths))
553
+ }
554
+
555
+ results = []
556
+ with tqdm(total=len(h5_paths), desc="Migrating") as pbar:
557
+ for future in as_completed(futures):
558
+ idx = futures[future]
559
+ try:
560
+ result = future.result()
561
+ results.append((idx, result))
562
+ pbar.update(1)
563
+ except Exception as e:
564
+ print(f"\nError migrating {h5_paths[idx]}: {e}")
565
+ results.append((idx, None))
566
+ pbar.update(1)
567
+
568
+ # Sort results by original order
569
+ results.sort(key=lambda x: x[0])
570
+ migrated_paths = [r[1] for r in results if r[1] is not None]
571
+
572
+ else:
573
+ # Sequential processing
574
+ migrated_paths = []
575
+ for h5_path, zarr_path in tqdm(
576
+ zip(h5_paths, zarr_paths),
577
+ total=len(h5_paths),
578
+ desc="Migrating"
579
+ ):
580
+ try:
581
+ result = migrate_h5_to_zarr(
582
+ h5_path, zarr_path,
583
+ compressor=compressor,
584
+ chunks=chunks,
585
+ overwrite=overwrite,
586
+ show_progress=False,
587
+ validate=True
588
+ )
589
+ migrated_paths.append(result)
590
+ except Exception as e:
591
+ print(f"\nError migrating {h5_path}: {e}")
592
+
593
+ print(f"\nSuccessfully migrated {len(migrated_paths)}/{len(h5_paths)} files")
594
+
595
+ return migrated_paths
596
+
597
+
598
+ # Example usage in docstring
599
+ if __name__ == "__main__":
600
+ # Example 1: Basic migration
601
+ # migrate_h5_to_zarr("data.h5")
602
+
603
+ # Example 2: Custom settings
604
+ # migrate_h5_to_zarr(
605
+ # "large_data.h5",
606
+ # "compressed_data.zarr",
607
+ # compressor="blosc",
608
+ # chunks=(100, 100, 10)
609
+ # )
610
+
611
+ # Example 3: Batch migration
612
+ # import glob
613
+ # h5_files = glob.glob("*.h5")
614
+ # migrate_h5_to_zarr_batch(h5_files, parallel=True)
615
+
616
+ pass
@@ -49,6 +49,10 @@ def geometric_median(xx, dim=-1):
49
49
 
50
50
 
51
51
  if __name__ == "__main__":
52
+ import sys
53
+ import matplotlib.pyplot as plt
54
+ import scitex
55
+
52
56
  # # Argument Parser
53
57
  # import argparse
54
58
  # parser = argparse.ArgumentParser(description='')
@@ -57,8 +61,8 @@ if __name__ == "__main__":
57
61
  # args = parser.parse_args()
58
62
 
59
63
  # Main
60
- CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.gen.start(sys, plt, verbose=False)
64
+ CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start(sys, plt, verbose=False)
61
65
  main()
62
- scitex.gen.close(CONFIG, verbose=False, notify=False)
66
+ scitex.session.close(CONFIG, verbose=False, notify=False)
63
67
 
64
68
  # EOF