scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,18 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
- # Time-stamp: "2024-11-29 04:31:43 (ywatanabe)"
4
- # File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/_QueryMixin.py
5
-
6
- THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3Mixins/_QueryMixin.py"
3
+ # Timestamp: "2025-09-11 05:49:14 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_QueryMixin.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = __file__
9
+ __DIR__ = os.path.dirname(__FILE__)
10
+ # ----------------------------------------
7
11
 
8
12
  import sqlite3
9
13
  from typing import List, Tuple
10
14
 
11
15
  import pandas as pd
12
- from .._BaseMixins._BaseQueryMixin import _BaseQueryMixin
13
16
 
14
17
 
15
18
  class _QueryMixin:
@@ -18,34 +21,58 @@ class _QueryMixin:
18
21
  def _sanitize_parameters(self, parameters):
19
22
  """Convert pandas Timestamp objects to strings"""
20
23
  if isinstance(parameters, (list, tuple)):
21
- return [str(p) if isinstance(p, pd.Timestamp) else p for p in parameters]
24
+ return [
25
+ str(p) if isinstance(p, pd.Timestamp) else p
26
+ for p in parameters
27
+ ]
22
28
  return parameters
23
29
 
24
30
  def execute(self, query: str, parameters: Tuple = ()) -> None:
31
+ self.ensure_connection()
32
+ self._check_context_manager()
33
+
25
34
  if not self.cursor:
26
35
  raise ConnectionError("Database not connected")
27
36
 
28
37
  if any(
29
38
  keyword in query.upper()
30
- for keyword in ["INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER"]
39
+ for keyword in [
40
+ "INSERT",
41
+ "UPDATE",
42
+ "DELETE",
43
+ "DROP",
44
+ "CREATE",
45
+ "ALTER",
46
+ ]
31
47
  ):
32
48
  self._check_writable()
33
49
 
34
50
  try:
35
51
  parameters = self._sanitize_parameters(parameters)
36
52
  self.cursor.execute(query, parameters)
37
- self.conn.commit()
53
+ if self.autocommit:
54
+ self.conn.commit()
55
+ self.cursor.execute("PRAGMA wal_checkpoint(PASSIVE)")
56
+ # self.cursor.execute("PRAGMA wal_checkpoint(FULL)")
38
57
  return self.cursor
39
58
  except sqlite3.Error as err:
40
59
  raise sqlite3.Error(f"Query execution failed: {err}")
41
60
 
42
61
  def executemany(self, query: str, parameters: List[Tuple]) -> None:
62
+ self.ensure_connection()
43
63
  if not self.cursor:
44
64
  raise ConnectionError("Database not connected")
45
65
 
46
66
  if any(
47
67
  keyword in query.upper()
48
- for keyword in ["INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER"]
68
+ for keyword in [
69
+ "INSERT",
70
+ "UPDATE",
71
+ "DELETE",
72
+ "DROP",
73
+ "CREATE",
74
+ "ALTER",
75
+ ]
49
76
  ):
50
77
  self._check_writable()
51
78
 
@@ -57,6 +84,7 @@ class _QueryMixin:
57
84
  raise sqlite3.Error(f"Batch query execution failed: {err}")
58
85
 
59
86
  def executescript(self, script: str) -> None:
87
+ self.ensure_connection()
60
88
  if not self.cursor:
61
89
  raise ConnectionError("Database not connected")
62
90
 
@@ -79,5 +107,4 @@ class _QueryMixin:
79
107
  except sqlite3.Error as err:
80
108
  raise sqlite3.Error(f"Script execution failed: {err}")
81
109
 
82
-
83
110
  # EOF
@@ -1,15 +1,26 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-09-08 12:00:38 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_RowMixin.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = __file__
9
+ __DIR__ = os.path.dirname(__FILE__)
10
+ # ----------------------------------------
11
+
3
12
  # Time-stamp: "2024-11-25 01:38:17 (ywatanabe)"
4
- # File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/_RowMixin.py
13
+
14
+ from typing import Union
5
15
 
6
16
  THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3Mixins/_RowMixin.py"
7
17
 
8
18
  import sqlite3
9
- from typing import List
10
- from typing import Optional
19
+ from typing import List, Optional
20
+
11
21
  import pandas as pd
12
- from .._BaseMixins._BaseRowMixin import _BaseRowMixin
22
+
23
+ from ..._BaseMixins._BaseRowMixin import _BaseRowMixin
13
24
 
14
25
 
15
26
  class _RowMixin:
@@ -19,12 +30,27 @@ class _RowMixin:
19
30
  self,
20
31
  table_name: str,
21
32
  columns: List[str] = None,
33
+ ids: Union[int, List[int], str] = "all",
22
34
  where: str = None,
23
35
  order_by: str = None,
24
36
  limit: Optional[int] = None,
25
37
  offset: Optional[int] = None,
26
38
  return_as: str = "dataframe",
39
+ include_blobs: bool = True,
27
40
  ):
41
+ if not include_blobs:
42
+ if columns is None:
43
+ schema = self.get_table_schema(table_name)
44
+ columns = schema[
45
+ ~schema["type"].str.contains("BLOB", case=False)
46
+ ]["name"].tolist()
47
+ else:
48
+ schema = self.get_table_schema(table_name)
49
+ blob_columns = schema[
50
+ schema["type"].str.contains("BLOB", case=False)
51
+ ]["name"].tolist()
52
+ columns = [col for col in columns if col not in blob_columns]
53
+
28
54
  if columns is None:
29
55
  columns_str = "*"
30
56
  elif isinstance(columns, str):
@@ -32,6 +58,19 @@ class _RowMixin:
32
58
  else:
33
59
  columns_str = ", ".join(f'"{col}"' for col in columns)
34
60
 
61
+ # Handle ids parameter
62
+ if ids != "all":
63
+ if isinstance(ids, int):
64
+ id_where = f"id = {ids}"
65
+ else:
66
+ id_list = ",".join(map(str, ids))
67
+ id_where = f"id IN ({id_list})"
68
+
69
+ if where:
70
+ where = f"({where}) AND ({id_where})"
71
+ else:
72
+ where = id_where
73
+
35
74
  try:
36
75
  query_parts = [f"SELECT {columns_str} FROM {table_name}"]
37
76
 
@@ -47,7 +86,9 @@ class _RowMixin:
47
86
  query = " ".join(query_parts)
48
87
  self.cursor.execute(query)
49
88
 
50
- column_names = [description[0] for description in self.cursor.description]
89
+ column_names = [
90
+ description[0] for description in self.cursor.description
91
+ ]
51
92
  data = self.cursor.fetchall()
52
93
 
53
94
  if return_as == "list":
@@ -71,5 +112,4 @@ class _RowMixin:
71
112
  self.cursor.execute(query)
72
113
  return self.cursor.fetchone()[0]
73
114
 
74
-
75
115
  # EOF
@@ -1,19 +1,26 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-09-11 05:47:57 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_TableMixin.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = __file__
9
+ __DIR__ = os.path.dirname(__FILE__)
10
+ # ----------------------------------------
11
+
3
12
  # Time-stamp: "2024-11-25 01:38:47 (ywatanabe)"
4
- # File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/_TableMixin.py
5
13
 
6
14
  THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3Mixins/_TableMixin.py"
7
15
 
8
- #!/usr/bin/env python3
9
- # -*- coding: utf-8 -*-
10
16
  # Time-stamp: "2024-11-11 19:13:19 (ywatanabe)"
11
- # File: ./scitex_repo/src/scitex/db/_BaseSQLiteDB_modules/_TableMixin.py
12
17
 
13
18
  import sqlite3
14
19
  from typing import Any, Dict, List, Union
20
+
15
21
  import pandas as pd
16
- from .._BaseMixins._BaseTableMixin import _BaseTableMixin
22
+
23
+ from ..._BaseMixins._BaseTableMixin import _BaseTableMixin
17
24
 
18
25
 
19
26
  class _TableMixin:
@@ -38,6 +45,8 @@ class _TableMixin:
38
45
  [
39
46
  f"{col_name}_dtype TEXT DEFAULT 'unknown'",
40
47
  f"{col_name}_shape TEXT DEFAULT 'unknown'",
48
+ f"{col_name}_is_compressed BOOLEAN DEFAULT FALSE",
49
+ f"{col_name}_hash TEXT DEFAULT NULL",
41
50
  ]
42
51
  )
43
52
 
@@ -101,9 +110,7 @@ class _TableMixin:
101
110
  return
102
111
 
103
112
  try:
104
- query = (
105
- f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
106
- )
113
+ query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
107
114
  if default_value is not None:
108
115
  query += f" DEFAULT {default_value}"
109
116
  self.execute(query)
@@ -121,6 +128,18 @@ class _TableMixin:
121
128
  "TEXT",
122
129
  default_value="'unknown'",
123
130
  )
131
+ self.add_column(
132
+ table_name,
133
+ f"{column_name}_is_compressed",
134
+ "BOOLEAN",
135
+ default_value="FALSE",
136
+ )
137
+ self.add_column(
138
+ table_name,
139
+ f"{column_name}_hash",
140
+ "TEXT",
141
+ default_value="NULL",
142
+ )
124
143
 
125
144
  except sqlite3.OperationalError as err:
126
145
  raise ValueError(f"Failed to add column: {err}")
@@ -131,6 +150,29 @@ class _TableMixin:
131
150
  columns: Union[str, List[str]],
132
151
  if_exists: bool = True,
133
152
  ) -> None:
153
+ """
154
+ DEPRECATED: Use the new drop_columns method from _DropMixin for better compatibility.
155
+ This method will be removed in a future version.
156
+ """
157
+ import warnings
158
+ warnings.warn(
159
+ "TableMixin.drop_columns is deprecated. Use the enhanced drop_columns method "
160
+ "from DropMixin which handles SQLite version compatibility automatically.",
161
+ DeprecationWarning,
162
+ stacklevel=2
163
+ )
164
+
165
+ # Delegate to the new implementation if available
166
+ # Check all classes in MRO for the enhanced drop_columns method
167
+ for cls in self.__class__.__mro__:
168
+ if (hasattr(cls, 'drop_columns') and
169
+ hasattr(cls, '_supports_native_drop_column') and
170
+ cls.__name__ == '_DropMixin'):
171
+ # Call DropMixin's drop_columns directly
172
+ cls.drop_columns(self, table_name, columns, if_exists)
173
+ return
174
+
175
+ # Fallback to original implementation for compatibility
134
176
  with self.transaction():
135
177
  if isinstance(columns, str):
136
178
  columns = [columns]
@@ -146,15 +188,19 @@ class _TableMixin:
146
188
  return
147
189
 
148
190
  # Drop multiple columns in a single ALTER TABLE statement
149
- drop_clause = ", ".join(f"DROP COLUMN {col}" for col in columns_to_drop)
191
+ drop_clause = ", ".join(
192
+ f"DROP COLUMN {col}" for col in columns_to_drop
193
+ )
150
194
  self.execute(f"ALTER TABLE {table_name} {drop_clause}")
151
195
 
152
196
  def get_table_names(self) -> List[str]:
197
+ self.ensure_connection()
153
198
  query = "SELECT name FROM sqlite_master WHERE type='table'"
154
199
  self.cursor.execute(query)
155
200
  return [table[0] for table in self.cursor.fetchall()]
156
201
 
157
202
  def get_table_schema(self, table_name: str) -> pd.DataFrame:
203
+ self.ensure_connection()
158
204
  query = f"PRAGMA table_info({table_name})"
159
205
  self.cursor.execute(query)
160
206
  columns = ["cid", "name", "type", "notnull", "dflt_value", "pk"]
@@ -166,6 +212,7 @@ class _TableMixin:
166
212
  return pk_col[0] if len(pk_col) > 0 else None
167
213
 
168
214
  def get_table_stats(self, table_name: str) -> Dict[str, int]:
215
+ self.ensure_connection()
169
216
  try:
170
217
  pages = self.cursor.execute(f"PRAGMA page_count").fetchone()[0]
171
218
  page_size = self.cursor.execute(f"PRAGMA page_size").fetchone()[0]
@@ -179,5 +226,4 @@ class _TableMixin:
179
226
  except sqlite3.Error as err:
180
227
  raise ValueError(f"Failed to get table size: {err}")
181
228
 
182
-
183
229
  # EOF
@@ -9,7 +9,7 @@ THIS_FILE = (
9
9
 
10
10
  import sqlite3
11
11
  import contextlib
12
- from .._BaseMixins._BaseTransactionMixin import _BaseTransactionMixin
12
+ from ..._BaseMixins._BaseTransactionMixin import _BaseTransactionMixin
13
13
 
14
14
 
15
15
  class _TransactionMixin:
@@ -1,11 +1,19 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-16 09:46:33 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/__init__.py
5
+ # ----------------------------------------
6
+ import os
7
+ __FILE__ = __file__
8
+ __DIR__ = os.path.dirname(__FILE__)
9
+ # ----------------------------------------
3
10
  # Time-stamp: "2024-11-12 09:29:50 (ywatanabe)"
4
- # File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/__init__.py
5
11
 
12
+ from ._ArrayMixin import _ArrayMixin
6
13
  from ._BatchMixin import _BatchMixin
7
14
  from ._BlobMixin import _BlobMixin
8
15
  from ._ConnectionMixin import _ConnectionMixin
16
+ from ._ColumnMixin import _ColumnMixin
9
17
  from ._ImportExportMixin import _ImportExportMixin
10
18
  from ._IndexMixin import _IndexMixin
11
19
  from ._MaintenanceMixin import _MaintenanceMixin
@@ -13,11 +21,14 @@ from ._QueryMixin import _QueryMixin
13
21
  from ._RowMixin import _RowMixin
14
22
  from ._TableMixin import _TableMixin
15
23
  from ._TransactionMixin import _TransactionMixin
24
+ from ._GitMixin import _GitMixin
16
25
 
17
26
  __all__ = [
27
+ "_ArrayMixin",
18
28
  "_BatchMixin",
19
29
  "_BlobMixin",
20
30
  "_ConnectionMixin",
31
+ "_ColumnMixin",
21
32
  "_ImportExportMixin",
22
33
  "_IndexMixin",
23
34
  "_MaintenanceMixin",
@@ -25,6 +36,7 @@ __all__ = [
25
36
  "_RowMixin",
26
37
  "_TableMixin",
27
38
  "_TransactionMixin",
39
+ "_GitMixin",
28
40
  ]
29
41
 
30
- # EOF
42
+ # EOF
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env python3
2
+ """SQLite3 database module."""
3
+
4
+ from ._SQLite3 import SQLite3
5
+ from ._delete_duplicates import delete_sqlite3_duplicates
6
+
7
+ __all__ = ["SQLite3", "delete_sqlite3_duplicates"]
@@ -0,0 +1,274 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-16 14:00:04 (ywatanabe)"
4
+ # File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_delete_duplicates.py
5
+ # ----------------------------------------
6
+ import os
7
+ __FILE__ = __file__
8
+ __DIR__ = os.path.dirname(__FILE__)
9
+ # ----------------------------------------
10
+ # Time-stamp: "2024-11-11 14:16:58 (ywatanabe)"
11
+
12
+ import sqlite3
13
+ from typing import List
14
+ from typing import Optional
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ import pandas as pd
19
+
20
+ """
21
+ Functionality:
22
+ - Deletes duplicate entries from an SQLite database table
23
+ Input:
24
+ - SQLite database file path, table name, columns to consider for duplicates
25
+ Output:
26
+ - Updated SQLite database with duplicates removed
27
+ Prerequisites:
28
+ - sqlite3, pandas, tqdm, scitex
29
+ """
30
+
31
+
32
+ def _sort_db(
33
+ cursor: sqlite3.Cursor, table_name: str, columns: List[str]
34
+ ) -> None:
35
+ """
36
+ Sorts the database table based on the specified columns.
37
+
38
+ Parameters
39
+ ----------
40
+ cursor : sqlite3.Cursor
41
+ The cursor object for executing SQL commands.
42
+ table_name : str
43
+ The name of the table to be sorted.
44
+ columns : List[str]
45
+ The list of column names to sort by, in order of priority.
46
+
47
+ Example
48
+ -------
49
+ >>> conn = sqlite3.connect('example.db')
50
+ >>> cursor = conn.cursor()
51
+ >>> _sort_db(cursor, 'my_table', ['column1', 'column2'])
52
+ >>> conn.commit()
53
+ >>> conn.close()
54
+ """
55
+ columns_str = ", ".join(columns)
56
+ temp_table = f"{table_name}_temp"
57
+
58
+ cursor.execute(
59
+ f"CREATE TABLE {temp_table} AS SELECT * FROM {table_name} ORDER BY {columns_str}"
60
+ )
61
+ cursor.execute(f"DROP TABLE {table_name}")
62
+ cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
63
+
64
+
65
+ # def _determine_columns(
66
+ # cursor: sqlite3.Cursor,
67
+ # table_name: str,
68
+ # columns: Union[str, List[str]],
69
+ # include_blob: bool,
70
+ # ) -> List[str]:
71
+ # cursor.execute(f"PRAGMA table_info({table_name})")
72
+ # table_info = cursor.fetchall()
73
+ # all_columns = [col[1] for col in table_info]
74
+ # column_types = {col[1]: col[2] for col in table_info}
75
+
76
+ # if columns == "all":
77
+ # columns = (
78
+ # all_columns
79
+ # if include_blob
80
+ # else [
81
+ # col
82
+ # for col in all_columns
83
+ # if column_types[col].lower() != "blob"
84
+ # ]
85
+ # )
86
+ # elif isinstance(columns, str):
87
+ # columns = [columns]
88
+
89
+ # columns_str = ", ".join(columns)
90
+ # print(f"Columns considered for duplicates: {columns_str}")
91
+
92
+ # return columns
93
+
94
+ def _determine_columns(
95
+ cursor: sqlite3.Cursor,
96
+ table_name: str,
97
+ columns: Union[str, List[str]],
98
+ include_blob: bool,
99
+ ) -> List[str]:
100
+ cursor.execute(f"PRAGMA table_info({table_name})")
101
+ table_info = cursor.fetchall()
102
+ all_columns = [col[1] for col in table_info]
103
+ column_types = {col[1]: col[2] for col in table_info}
104
+
105
+ if columns == "all":
106
+ columns = all_columns
107
+ # Exclude blob columns
108
+ if not include_blob:
109
+ columns = [col for col in columns if column_types[col].lower() != "blob"]
110
+ # Exclude timestamp columns
111
+ columns = [col for col in columns if not col.endswith("_at")]
112
+ elif isinstance(columns, str):
113
+ columns = [columns]
114
+
115
+ columns_str = ", ".join(columns)
116
+ print(f"Columns considered for duplicates: {columns_str}")
117
+
118
+ return columns
119
+
120
+ def _fetch_as_df(
121
+ cursor: sqlite3.Cursor, columns: List[str], table_name: str
122
+ ) -> pd.DataFrame:
123
+ print("\nFetching all database entries...")
124
+ columns_str = ", ".join(columns)
125
+ query = f"SELECT {columns_str} FROM {table_name}"
126
+ cursor.execute(query)
127
+ df_entries = cursor.fetchall()
128
+ return pd.DataFrame(df_entries, columns=columns)
129
+
130
+
131
+ def _find_duplicated(df: pd.DataFrame) -> pd.DataFrame:
132
+ df_duplicated = df[df.duplicated(keep="first")].copy()
133
+ duplication_rate = len(df_duplicated) / (len(df) - len(df_duplicated))
134
+ print(
135
+ f"\n{100*duplication_rate:.2f}% of data was duplicated. Cleaning up..."
136
+ )
137
+ print(f"\nOriginal entries:\n{df.head()}")
138
+ print(f"\nDuplicated entries:\n{df_duplicated.head()}")
139
+ return df_duplicated
140
+
141
+
142
+ def verify_duplicated_index(
143
+ cursor: sqlite3.Cursor,
144
+ duplicated_row: pd.Series,
145
+ table_name: str,
146
+ dry_run: bool,
147
+ ) -> Tuple[str, bool]:
148
+ """Check if entry to delete is the one intended"""
149
+ columns = list(duplicated_row.index)
150
+ columns_str = ", ".join(columns)
151
+
152
+ where_conditions = " AND ".join([f"{col} = ?" for col in columns])
153
+ select_query = f"""
154
+ SELECT {columns_str}
155
+ FROM {table_name}
156
+ WHERE {where_conditions}
157
+ """
158
+ cursor.execute(select_query, tuple(duplicated_row))
159
+ entries = cursor.fetchall()
160
+
161
+ is_verified = len(entries) >= 1
162
+
163
+ if dry_run:
164
+ print(f"Expected duplicate entry: {tuple(duplicated_row)}")
165
+ print(f"Found entries: {entries}")
166
+ print(f"Verification {'succeeded' if is_verified else 'failed'}")
167
+
168
+ return select_query, is_verified
169
+
170
+
171
+ def _delete_entry(
172
+ cursor: sqlite3.Cursor,
173
+ duplicated_row: pd.Series,
174
+ table_name: str,
175
+ dry_run: bool = True,
176
+ ) -> None:
177
+ select_query, is_verified = verify_duplicated_index(
178
+ cursor, duplicated_row, table_name, dry_run
179
+ )
180
+ if is_verified:
181
+ delete_query = select_query.replace("SELECT", "DELETE")
182
+ if dry_run:
183
+ print(f"[DRY RUN] Would delete entry:\n{duplicated_row}")
184
+ else:
185
+ cursor.execute(delete_query, tuple(duplicated_row))
186
+ print(f"Deleted entry:\n{duplicated_row}")
187
+ else:
188
+ print(
189
+ f"Skipping entry (not found or already deleted):\n{duplicated_row}"
190
+ )
191
+
192
+
193
+ def delete_sqlite3_duplicates(
194
+ lpath_db: str,
195
+ table_name: str,
196
+ columns: Union[str, List[str]] = "all",
197
+ include_blob: bool = False,
198
+ chunk_size: int = 10_000,
199
+ dry_run: bool = True,
200
+ ) -> Tuple[Optional[int], Optional[int]]:
201
+ try:
202
+ conn = sqlite3.connect(lpath_db)
203
+ cursor = conn.cursor()
204
+
205
+ # Vacuum the database to free up space
206
+ if not dry_run:
207
+ cursor.execute("VACUUM")
208
+ conn.commit()
209
+
210
+ columns = _determine_columns(cursor, table_name, columns, include_blob)
211
+ columns_str = ", ".join(columns)
212
+
213
+ # Drop temp table if exists from previous run
214
+ temp_table = f"{table_name}_temp"
215
+ cursor.execute(f"DROP TABLE IF EXISTS {temp_table}")
216
+
217
+ # Get all columns for creating temp table with same structure
218
+ cursor.execute(f"PRAGMA table_info({table_name})")
219
+ all_cols_info = cursor.fetchall()
220
+ all_cols = [col[1] for col in all_cols_info]
221
+ all_cols_str = ", ".join(all_cols)
222
+
223
+ # Create temp table with same structure
224
+ cursor.execute(f"CREATE TABLE {temp_table} AS SELECT {all_cols_str} FROM {table_name} LIMIT 0")
225
+
226
+ # Get total row count
227
+ total_rows = cursor.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
228
+ print(f"Total rows in table: {total_rows}")
229
+
230
+ # Insert unique rows based on specified columns
231
+ insert_query = f"""
232
+ INSERT INTO {temp_table}
233
+ SELECT {all_cols_str}
234
+ FROM (
235
+ SELECT *, ROW_NUMBER() OVER (PARTITION BY {columns_str} ORDER BY rowid) as rn
236
+ FROM {table_name}
237
+ )
238
+ WHERE rn = 1
239
+ """
240
+
241
+ if dry_run:
242
+ print(f"[DRY RUN] Would execute deduplication based on: {columns_str}")
243
+ else:
244
+ cursor.execute(insert_query)
245
+ conn.commit()
246
+
247
+ # Count unique rows
248
+ total_unique = cursor.execute(f"SELECT COUNT(*) FROM {temp_table}").fetchone()[0]
249
+ total_duplicates = total_rows - total_unique
250
+
251
+ if not dry_run:
252
+ # Replace original table with deduplicated one
253
+ cursor.execute(f"DROP TABLE {table_name}")
254
+ cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
255
+ cursor.execute("VACUUM")
256
+ conn.commit()
257
+ else:
258
+ # Clean up temp table in dry run
259
+ cursor.execute(f"DROP TABLE IF EXISTS {temp_table}")
260
+
261
+ print(f"Total rows processed: {total_rows}")
262
+ print(f"Total unique rows: {total_unique}")
263
+ print(f"Total duplicates removed: {total_duplicates}")
264
+
265
+ return total_rows, total_duplicates
266
+
267
+ except Exception as error:
268
+ print(f"An error occurred: {error}")
269
+ return None, None
270
+
271
+ finally:
272
+ conn.close()
273
+
274
+ # EOF
@@ -4,6 +4,7 @@
4
4
  from ._auto_order import AutoOrderDecorator, batch_fn, disable_auto_order, enable_auto_order, numpy_fn, pandas_fn, torch_fn
5
5
  from ._batch_fn import batch_fn
6
6
  from ._cache_disk import cache_disk
7
+ from ._cache_disk_async import cache_disk_async
7
8
  from ._cache_mem import cache_mem
8
9
  from ._combined import batch_numpy_fn, batch_pandas_fn, batch_torch_fn, numpy_batch_fn, pandas_batch_fn, torch_batch_fn
9
10
  from ._converters import ConversionWarning, is_cuda, is_nested_decorator, is_torch, to_numpy, to_torch
@@ -27,6 +28,7 @@ __all__ = [
27
28
  "batch_pandas_fn",
28
29
  "batch_torch_fn",
29
30
  "cache_disk",
31
+ "cache_disk_async",
30
32
  "cache_mem",
31
33
  "deprecated",
32
34
  "disable_auto_order",