scitex 2.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (572) hide show
  1. scitex/__init__.py +73 -0
  2. scitex/__main__.py +89 -0
  3. scitex/__version__.py +14 -0
  4. scitex/_sh.py +59 -0
  5. scitex/ai/_LearningCurveLogger.py +583 -0
  6. scitex/ai/__Classifiers.py +101 -0
  7. scitex/ai/__init__.py +55 -0
  8. scitex/ai/_gen_ai/_Anthropic.py +173 -0
  9. scitex/ai/_gen_ai/_BaseGenAI.py +336 -0
  10. scitex/ai/_gen_ai/_DeepSeek.py +175 -0
  11. scitex/ai/_gen_ai/_Google.py +161 -0
  12. scitex/ai/_gen_ai/_Groq.py +97 -0
  13. scitex/ai/_gen_ai/_Llama.py +142 -0
  14. scitex/ai/_gen_ai/_OpenAI.py +230 -0
  15. scitex/ai/_gen_ai/_PARAMS.py +565 -0
  16. scitex/ai/_gen_ai/_Perplexity.py +191 -0
  17. scitex/ai/_gen_ai/__init__.py +32 -0
  18. scitex/ai/_gen_ai/_calc_cost.py +78 -0
  19. scitex/ai/_gen_ai/_format_output_func.py +183 -0
  20. scitex/ai/_gen_ai/_genai_factory.py +71 -0
  21. scitex/ai/act/__init__.py +8 -0
  22. scitex/ai/act/_define.py +11 -0
  23. scitex/ai/classification/__init__.py +7 -0
  24. scitex/ai/classification/classification_reporter.py +1137 -0
  25. scitex/ai/classification/classifier_server.py +131 -0
  26. scitex/ai/classification/classifiers.py +101 -0
  27. scitex/ai/classification_reporter.py +1161 -0
  28. scitex/ai/classifier_server.py +131 -0
  29. scitex/ai/clustering/__init__.py +11 -0
  30. scitex/ai/clustering/_pca.py +115 -0
  31. scitex/ai/clustering/_umap.py +376 -0
  32. scitex/ai/early_stopping.py +149 -0
  33. scitex/ai/feature_extraction/__init__.py +56 -0
  34. scitex/ai/feature_extraction/vit.py +148 -0
  35. scitex/ai/genai/__init__.py +277 -0
  36. scitex/ai/genai/anthropic.py +177 -0
  37. scitex/ai/genai/anthropic_provider.py +320 -0
  38. scitex/ai/genai/anthropic_refactored.py +109 -0
  39. scitex/ai/genai/auth_manager.py +200 -0
  40. scitex/ai/genai/base_genai.py +336 -0
  41. scitex/ai/genai/base_provider.py +291 -0
  42. scitex/ai/genai/calc_cost.py +78 -0
  43. scitex/ai/genai/chat_history.py +307 -0
  44. scitex/ai/genai/cost_tracker.py +276 -0
  45. scitex/ai/genai/deepseek.py +188 -0
  46. scitex/ai/genai/deepseek_provider.py +251 -0
  47. scitex/ai/genai/format_output_func.py +183 -0
  48. scitex/ai/genai/genai_factory.py +71 -0
  49. scitex/ai/genai/google.py +169 -0
  50. scitex/ai/genai/google_provider.py +228 -0
  51. scitex/ai/genai/groq.py +104 -0
  52. scitex/ai/genai/groq_provider.py +248 -0
  53. scitex/ai/genai/image_processor.py +250 -0
  54. scitex/ai/genai/llama.py +155 -0
  55. scitex/ai/genai/llama_provider.py +214 -0
  56. scitex/ai/genai/mock_provider.py +127 -0
  57. scitex/ai/genai/model_registry.py +304 -0
  58. scitex/ai/genai/openai.py +230 -0
  59. scitex/ai/genai/openai_provider.py +293 -0
  60. scitex/ai/genai/params.py +565 -0
  61. scitex/ai/genai/perplexity.py +202 -0
  62. scitex/ai/genai/perplexity_provider.py +205 -0
  63. scitex/ai/genai/provider_base.py +302 -0
  64. scitex/ai/genai/provider_factory.py +370 -0
  65. scitex/ai/genai/response_handler.py +235 -0
  66. scitex/ai/layer/_Pass.py +21 -0
  67. scitex/ai/layer/__init__.py +10 -0
  68. scitex/ai/layer/_switch.py +8 -0
  69. scitex/ai/loss/_L1L2Losses.py +34 -0
  70. scitex/ai/loss/__init__.py +12 -0
  71. scitex/ai/loss/multi_task_loss.py +47 -0
  72. scitex/ai/metrics/__init__.py +9 -0
  73. scitex/ai/metrics/_bACC.py +51 -0
  74. scitex/ai/metrics/silhoute_score_block.py +496 -0
  75. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  76. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  77. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  78. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  79. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  80. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  81. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  82. scitex/ai/optim/__init__.py +13 -0
  83. scitex/ai/optim/_get_set.py +31 -0
  84. scitex/ai/optim/_optimizers.py +71 -0
  85. scitex/ai/plt/__init__.py +21 -0
  86. scitex/ai/plt/_conf_mat.py +592 -0
  87. scitex/ai/plt/_learning_curve.py +194 -0
  88. scitex/ai/plt/_optuna_study.py +111 -0
  89. scitex/ai/plt/aucs/__init__.py +2 -0
  90. scitex/ai/plt/aucs/example.py +60 -0
  91. scitex/ai/plt/aucs/pre_rec_auc.py +223 -0
  92. scitex/ai/plt/aucs/roc_auc.py +246 -0
  93. scitex/ai/sampling/undersample.py +29 -0
  94. scitex/ai/sk/__init__.py +11 -0
  95. scitex/ai/sk/_clf.py +58 -0
  96. scitex/ai/sk/_to_sktime.py +100 -0
  97. scitex/ai/sklearn/__init__.py +26 -0
  98. scitex/ai/sklearn/clf.py +58 -0
  99. scitex/ai/sklearn/to_sktime.py +100 -0
  100. scitex/ai/training/__init__.py +7 -0
  101. scitex/ai/training/early_stopping.py +150 -0
  102. scitex/ai/training/learning_curve_logger.py +555 -0
  103. scitex/ai/utils/__init__.py +22 -0
  104. scitex/ai/utils/_check_params.py +50 -0
  105. scitex/ai/utils/_default_dataset.py +46 -0
  106. scitex/ai/utils/_format_samples_for_sktime.py +26 -0
  107. scitex/ai/utils/_label_encoder.py +134 -0
  108. scitex/ai/utils/_merge_labels.py +22 -0
  109. scitex/ai/utils/_sliding_window_data_augmentation.py +11 -0
  110. scitex/ai/utils/_under_sample.py +51 -0
  111. scitex/ai/utils/_verify_n_gpus.py +16 -0
  112. scitex/ai/utils/grid_search.py +148 -0
  113. scitex/context/__init__.py +9 -0
  114. scitex/context/_suppress_output.py +38 -0
  115. scitex/db/_BaseMixins/_BaseBackupMixin.py +30 -0
  116. scitex/db/_BaseMixins/_BaseBatchMixin.py +31 -0
  117. scitex/db/_BaseMixins/_BaseBlobMixin.py +81 -0
  118. scitex/db/_BaseMixins/_BaseConnectionMixin.py +43 -0
  119. scitex/db/_BaseMixins/_BaseImportExportMixin.py +39 -0
  120. scitex/db/_BaseMixins/_BaseIndexMixin.py +29 -0
  121. scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +33 -0
  122. scitex/db/_BaseMixins/_BaseQueryMixin.py +52 -0
  123. scitex/db/_BaseMixins/_BaseRowMixin.py +32 -0
  124. scitex/db/_BaseMixins/_BaseSchemaMixin.py +44 -0
  125. scitex/db/_BaseMixins/_BaseTableMixin.py +66 -0
  126. scitex/db/_BaseMixins/_BaseTransactionMixin.py +52 -0
  127. scitex/db/_BaseMixins/__init__.py +30 -0
  128. scitex/db/_PostgreSQL.py +126 -0
  129. scitex/db/_PostgreSQLMixins/_BackupMixin.py +166 -0
  130. scitex/db/_PostgreSQLMixins/_BatchMixin.py +82 -0
  131. scitex/db/_PostgreSQLMixins/_BlobMixin.py +231 -0
  132. scitex/db/_PostgreSQLMixins/_ConnectionMixin.py +92 -0
  133. scitex/db/_PostgreSQLMixins/_ImportExportMixin.py +59 -0
  134. scitex/db/_PostgreSQLMixins/_IndexMixin.py +64 -0
  135. scitex/db/_PostgreSQLMixins/_MaintenanceMixin.py +175 -0
  136. scitex/db/_PostgreSQLMixins/_QueryMixin.py +108 -0
  137. scitex/db/_PostgreSQLMixins/_RowMixin.py +75 -0
  138. scitex/db/_PostgreSQLMixins/_SchemaMixin.py +126 -0
  139. scitex/db/_PostgreSQLMixins/_TableMixin.py +176 -0
  140. scitex/db/_PostgreSQLMixins/_TransactionMixin.py +57 -0
  141. scitex/db/_PostgreSQLMixins/__init__.py +34 -0
  142. scitex/db/_SQLite3.py +2136 -0
  143. scitex/db/_SQLite3Mixins/_BatchMixin.py +243 -0
  144. scitex/db/_SQLite3Mixins/_BlobMixin.py +229 -0
  145. scitex/db/_SQLite3Mixins/_ConnectionMixin.py +108 -0
  146. scitex/db/_SQLite3Mixins/_ImportExportMixin.py +80 -0
  147. scitex/db/_SQLite3Mixins/_IndexMixin.py +32 -0
  148. scitex/db/_SQLite3Mixins/_MaintenanceMixin.py +176 -0
  149. scitex/db/_SQLite3Mixins/_QueryMixin.py +83 -0
  150. scitex/db/_SQLite3Mixins/_RowMixin.py +75 -0
  151. scitex/db/_SQLite3Mixins/_TableMixin.py +183 -0
  152. scitex/db/_SQLite3Mixins/_TransactionMixin.py +71 -0
  153. scitex/db/_SQLite3Mixins/__init__.py +30 -0
  154. scitex/db/__init__.py +14 -0
  155. scitex/db/_delete_duplicates.py +397 -0
  156. scitex/db/_inspect.py +163 -0
  157. scitex/decorators/__init__.py +54 -0
  158. scitex/decorators/_auto_order.py +172 -0
  159. scitex/decorators/_batch_fn.py +127 -0
  160. scitex/decorators/_cache_disk.py +32 -0
  161. scitex/decorators/_cache_mem.py +12 -0
  162. scitex/decorators/_combined.py +98 -0
  163. scitex/decorators/_converters.py +282 -0
  164. scitex/decorators/_deprecated.py +26 -0
  165. scitex/decorators/_not_implemented.py +30 -0
  166. scitex/decorators/_numpy_fn.py +86 -0
  167. scitex/decorators/_pandas_fn.py +121 -0
  168. scitex/decorators/_preserve_doc.py +19 -0
  169. scitex/decorators/_signal_fn.py +95 -0
  170. scitex/decorators/_timeout.py +55 -0
  171. scitex/decorators/_torch_fn.py +136 -0
  172. scitex/decorators/_wrap.py +39 -0
  173. scitex/decorators/_xarray_fn.py +88 -0
  174. scitex/dev/__init__.py +15 -0
  175. scitex/dev/_analyze_code_flow.py +284 -0
  176. scitex/dev/_reload.py +59 -0
  177. scitex/dict/_DotDict.py +442 -0
  178. scitex/dict/__init__.py +18 -0
  179. scitex/dict/_listed_dict.py +42 -0
  180. scitex/dict/_pop_keys.py +36 -0
  181. scitex/dict/_replace.py +13 -0
  182. scitex/dict/_safe_merge.py +62 -0
  183. scitex/dict/_to_str.py +32 -0
  184. scitex/dsp/__init__.py +72 -0
  185. scitex/dsp/_crop.py +122 -0
  186. scitex/dsp/_demo_sig.py +331 -0
  187. scitex/dsp/_detect_ripples.py +212 -0
  188. scitex/dsp/_ensure_3d.py +18 -0
  189. scitex/dsp/_hilbert.py +78 -0
  190. scitex/dsp/_listen.py +702 -0
  191. scitex/dsp/_misc.py +30 -0
  192. scitex/dsp/_mne.py +32 -0
  193. scitex/dsp/_modulation_index.py +79 -0
  194. scitex/dsp/_pac.py +319 -0
  195. scitex/dsp/_psd.py +102 -0
  196. scitex/dsp/_resample.py +65 -0
  197. scitex/dsp/_time.py +36 -0
  198. scitex/dsp/_transform.py +68 -0
  199. scitex/dsp/_wavelet.py +212 -0
  200. scitex/dsp/add_noise.py +111 -0
  201. scitex/dsp/example.py +253 -0
  202. scitex/dsp/filt.py +155 -0
  203. scitex/dsp/norm.py +18 -0
  204. scitex/dsp/params.py +51 -0
  205. scitex/dsp/reference.py +43 -0
  206. scitex/dsp/template.py +25 -0
  207. scitex/dsp/utils/__init__.py +15 -0
  208. scitex/dsp/utils/_differential_bandpass_filters.py +120 -0
  209. scitex/dsp/utils/_ensure_3d.py +18 -0
  210. scitex/dsp/utils/_ensure_even_len.py +10 -0
  211. scitex/dsp/utils/_zero_pad.py +48 -0
  212. scitex/dsp/utils/filter.py +408 -0
  213. scitex/dsp/utils/pac.py +177 -0
  214. scitex/dt/__init__.py +8 -0
  215. scitex/dt/_linspace.py +130 -0
  216. scitex/etc/__init__.py +15 -0
  217. scitex/etc/wait_key.py +34 -0
  218. scitex/gen/_DimHandler.py +196 -0
  219. scitex/gen/_TimeStamper.py +244 -0
  220. scitex/gen/__init__.py +95 -0
  221. scitex/gen/_alternate_kwarg.py +13 -0
  222. scitex/gen/_cache.py +11 -0
  223. scitex/gen/_check_host.py +34 -0
  224. scitex/gen/_ci.py +12 -0
  225. scitex/gen/_close.py +222 -0
  226. scitex/gen/_embed.py +78 -0
  227. scitex/gen/_inspect_module.py +257 -0
  228. scitex/gen/_is_ipython.py +12 -0
  229. scitex/gen/_less.py +48 -0
  230. scitex/gen/_list_packages.py +139 -0
  231. scitex/gen/_mat2py.py +88 -0
  232. scitex/gen/_norm.py +170 -0
  233. scitex/gen/_paste.py +18 -0
  234. scitex/gen/_print_config.py +84 -0
  235. scitex/gen/_shell.py +48 -0
  236. scitex/gen/_src.py +111 -0
  237. scitex/gen/_start.py +451 -0
  238. scitex/gen/_symlink.py +55 -0
  239. scitex/gen/_symlog.py +27 -0
  240. scitex/gen/_tee.py +238 -0
  241. scitex/gen/_title2path.py +60 -0
  242. scitex/gen/_title_case.py +88 -0
  243. scitex/gen/_to_even.py +84 -0
  244. scitex/gen/_to_odd.py +34 -0
  245. scitex/gen/_to_rank.py +39 -0
  246. scitex/gen/_transpose.py +37 -0
  247. scitex/gen/_type.py +78 -0
  248. scitex/gen/_var_info.py +73 -0
  249. scitex/gen/_wrap.py +17 -0
  250. scitex/gen/_xml2dict.py +76 -0
  251. scitex/gen/misc.py +730 -0
  252. scitex/gen/path.py +0 -0
  253. scitex/general/__init__.py +5 -0
  254. scitex/gists/_SigMacro_processFigure_S.py +128 -0
  255. scitex/gists/_SigMacro_toBlue.py +172 -0
  256. scitex/gists/__init__.py +12 -0
  257. scitex/io/_H5Explorer.py +292 -0
  258. scitex/io/__init__.py +82 -0
  259. scitex/io/_cache.py +101 -0
  260. scitex/io/_flush.py +24 -0
  261. scitex/io/_glob.py +103 -0
  262. scitex/io/_json2md.py +113 -0
  263. scitex/io/_load.py +168 -0
  264. scitex/io/_load_configs.py +146 -0
  265. scitex/io/_load_modules/__init__.py +38 -0
  266. scitex/io/_load_modules/_catboost.py +66 -0
  267. scitex/io/_load_modules/_con.py +20 -0
  268. scitex/io/_load_modules/_db.py +24 -0
  269. scitex/io/_load_modules/_docx.py +42 -0
  270. scitex/io/_load_modules/_eeg.py +110 -0
  271. scitex/io/_load_modules/_hdf5.py +196 -0
  272. scitex/io/_load_modules/_image.py +19 -0
  273. scitex/io/_load_modules/_joblib.py +19 -0
  274. scitex/io/_load_modules/_json.py +18 -0
  275. scitex/io/_load_modules/_markdown.py +103 -0
  276. scitex/io/_load_modules/_matlab.py +37 -0
  277. scitex/io/_load_modules/_numpy.py +39 -0
  278. scitex/io/_load_modules/_optuna.py +155 -0
  279. scitex/io/_load_modules/_pandas.py +69 -0
  280. scitex/io/_load_modules/_pdf.py +31 -0
  281. scitex/io/_load_modules/_pickle.py +24 -0
  282. scitex/io/_load_modules/_torch.py +16 -0
  283. scitex/io/_load_modules/_txt.py +126 -0
  284. scitex/io/_load_modules/_xml.py +49 -0
  285. scitex/io/_load_modules/_yaml.py +23 -0
  286. scitex/io/_mv_to_tmp.py +19 -0
  287. scitex/io/_path.py +286 -0
  288. scitex/io/_reload.py +78 -0
  289. scitex/io/_save.py +539 -0
  290. scitex/io/_save_modules/__init__.py +66 -0
  291. scitex/io/_save_modules/_catboost.py +22 -0
  292. scitex/io/_save_modules/_csv.py +89 -0
  293. scitex/io/_save_modules/_excel.py +49 -0
  294. scitex/io/_save_modules/_hdf5.py +249 -0
  295. scitex/io/_save_modules/_html.py +48 -0
  296. scitex/io/_save_modules/_image.py +140 -0
  297. scitex/io/_save_modules/_joblib.py +25 -0
  298. scitex/io/_save_modules/_json.py +25 -0
  299. scitex/io/_save_modules/_listed_dfs_as_csv.py +57 -0
  300. scitex/io/_save_modules/_listed_scalars_as_csv.py +42 -0
  301. scitex/io/_save_modules/_matlab.py +24 -0
  302. scitex/io/_save_modules/_mp4.py +29 -0
  303. scitex/io/_save_modules/_numpy.py +57 -0
  304. scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +38 -0
  305. scitex/io/_save_modules/_pickle.py +45 -0
  306. scitex/io/_save_modules/_plotly.py +27 -0
  307. scitex/io/_save_modules/_text.py +23 -0
  308. scitex/io/_save_modules/_torch.py +26 -0
  309. scitex/io/_save_modules/_yaml.py +29 -0
  310. scitex/life/__init__.py +10 -0
  311. scitex/life/_monitor_rain.py +49 -0
  312. scitex/linalg/__init__.py +17 -0
  313. scitex/linalg/_distance.py +63 -0
  314. scitex/linalg/_geometric_median.py +64 -0
  315. scitex/linalg/_misc.py +73 -0
  316. scitex/nn/_AxiswiseDropout.py +27 -0
  317. scitex/nn/_BNet.py +126 -0
  318. scitex/nn/_BNet_Res.py +164 -0
  319. scitex/nn/_ChannelGainChanger.py +44 -0
  320. scitex/nn/_DropoutChannels.py +50 -0
  321. scitex/nn/_Filters.py +489 -0
  322. scitex/nn/_FreqGainChanger.py +110 -0
  323. scitex/nn/_GaussianFilter.py +48 -0
  324. scitex/nn/_Hilbert.py +111 -0
  325. scitex/nn/_MNet_1000.py +157 -0
  326. scitex/nn/_ModulationIndex.py +221 -0
  327. scitex/nn/_PAC.py +414 -0
  328. scitex/nn/_PSD.py +40 -0
  329. scitex/nn/_ResNet1D.py +120 -0
  330. scitex/nn/_SpatialAttention.py +25 -0
  331. scitex/nn/_Spectrogram.py +161 -0
  332. scitex/nn/_SwapChannels.py +50 -0
  333. scitex/nn/_TransposeLayer.py +19 -0
  334. scitex/nn/_Wavelet.py +183 -0
  335. scitex/nn/__init__.py +63 -0
  336. scitex/os/__init__.py +8 -0
  337. scitex/os/_mv.py +50 -0
  338. scitex/parallel/__init__.py +8 -0
  339. scitex/parallel/_run.py +151 -0
  340. scitex/path/__init__.py +33 -0
  341. scitex/path/_clean.py +52 -0
  342. scitex/path/_find.py +108 -0
  343. scitex/path/_get_module_path.py +51 -0
  344. scitex/path/_get_spath.py +35 -0
  345. scitex/path/_getsize.py +18 -0
  346. scitex/path/_increment_version.py +87 -0
  347. scitex/path/_mk_spath.py +51 -0
  348. scitex/path/_path.py +19 -0
  349. scitex/path/_split.py +23 -0
  350. scitex/path/_this_path.py +19 -0
  351. scitex/path/_version.py +101 -0
  352. scitex/pd/__init__.py +41 -0
  353. scitex/pd/_find_indi.py +126 -0
  354. scitex/pd/_find_pval.py +113 -0
  355. scitex/pd/_force_df.py +154 -0
  356. scitex/pd/_from_xyz.py +71 -0
  357. scitex/pd/_ignore_SettingWithCopyWarning.py +34 -0
  358. scitex/pd/_melt_cols.py +81 -0
  359. scitex/pd/_merge_columns.py +221 -0
  360. scitex/pd/_mv.py +63 -0
  361. scitex/pd/_replace.py +62 -0
  362. scitex/pd/_round.py +93 -0
  363. scitex/pd/_slice.py +63 -0
  364. scitex/pd/_sort.py +91 -0
  365. scitex/pd/_to_numeric.py +53 -0
  366. scitex/pd/_to_xy.py +59 -0
  367. scitex/pd/_to_xyz.py +110 -0
  368. scitex/plt/__init__.py +36 -0
  369. scitex/plt/_subplots/_AxesWrapper.py +182 -0
  370. scitex/plt/_subplots/_AxisWrapper.py +249 -0
  371. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +414 -0
  372. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +896 -0
  373. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +368 -0
  374. scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +185 -0
  375. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +16 -0
  376. scitex/plt/_subplots/_FigWrapper.py +226 -0
  377. scitex/plt/_subplots/_SubplotsWrapper.py +171 -0
  378. scitex/plt/_subplots/__init__.py +111 -0
  379. scitex/plt/_subplots/_export_as_csv.py +232 -0
  380. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +61 -0
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +90 -0
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +49 -0
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +46 -0
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +39 -0
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +125 -0
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +72 -0
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +34 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +36 -0
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +79 -0
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +59 -0
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +32 -0
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +79 -0
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +75 -0
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +64 -0
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +44 -0
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +70 -0
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +66 -0
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +95 -0
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +67 -0
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +52 -0
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +46 -0
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +46 -0
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +46 -0
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +46 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +44 -0
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +103 -0
  407. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +82 -0
  408. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +58 -0
  409. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +117 -0
  410. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +30 -0
  411. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +51 -0
  412. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +93 -0
  413. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +94 -0
  414. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +92 -0
  415. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +65 -0
  416. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +59 -0
  417. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +58 -0
  418. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +45 -0
  419. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +70 -0
  420. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +75 -0
  421. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +75 -0
  422. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +155 -0
  423. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +64 -0
  424. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +77 -0
  425. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +210 -0
  426. scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +342 -0
  427. scitex/plt/_subplots/_export_as_csv_formatters.py +115 -0
  428. scitex/plt/_tpl.py +28 -0
  429. scitex/plt/ax/__init__.py +114 -0
  430. scitex/plt/ax/_plot/__init__.py +53 -0
  431. scitex/plt/ax/_plot/_plot_circular_hist.py +124 -0
  432. scitex/plt/ax/_plot/_plot_conf_mat.py +136 -0
  433. scitex/plt/ax/_plot/_plot_cube.py +57 -0
  434. scitex/plt/ax/_plot/_plot_ecdf.py +84 -0
  435. scitex/plt/ax/_plot/_plot_fillv.py +55 -0
  436. scitex/plt/ax/_plot/_plot_heatmap.py +266 -0
  437. scitex/plt/ax/_plot/_plot_image.py +94 -0
  438. scitex/plt/ax/_plot/_plot_joyplot.py +76 -0
  439. scitex/plt/ax/_plot/_plot_raster.py +172 -0
  440. scitex/plt/ax/_plot/_plot_rectangle.py +69 -0
  441. scitex/plt/ax/_plot/_plot_scatter_hist.py +133 -0
  442. scitex/plt/ax/_plot/_plot_shaded_line.py +142 -0
  443. scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +221 -0
  444. scitex/plt/ax/_plot/_plot_violin.py +343 -0
  445. scitex/plt/ax/_style/__init__.py +38 -0
  446. scitex/plt/ax/_style/_add_marginal_ax.py +44 -0
  447. scitex/plt/ax/_style/_add_panel.py +92 -0
  448. scitex/plt/ax/_style/_extend.py +64 -0
  449. scitex/plt/ax/_style/_force_aspect.py +37 -0
  450. scitex/plt/ax/_style/_format_label.py +23 -0
  451. scitex/plt/ax/_style/_hide_spines.py +84 -0
  452. scitex/plt/ax/_style/_map_ticks.py +182 -0
  453. scitex/plt/ax/_style/_rotate_labels.py +215 -0
  454. scitex/plt/ax/_style/_sci_note.py +279 -0
  455. scitex/plt/ax/_style/_set_log_scale.py +299 -0
  456. scitex/plt/ax/_style/_set_meta.py +261 -0
  457. scitex/plt/ax/_style/_set_n_ticks.py +37 -0
  458. scitex/plt/ax/_style/_set_size.py +16 -0
  459. scitex/plt/ax/_style/_set_supxyt.py +116 -0
  460. scitex/plt/ax/_style/_set_ticks.py +276 -0
  461. scitex/plt/ax/_style/_set_xyt.py +121 -0
  462. scitex/plt/ax/_style/_share_axes.py +264 -0
  463. scitex/plt/ax/_style/_shift.py +139 -0
  464. scitex/plt/ax/_style/_show_spines.py +333 -0
  465. scitex/plt/color/_PARAMS.py +70 -0
  466. scitex/plt/color/__init__.py +52 -0
  467. scitex/plt/color/_add_hue_col.py +41 -0
  468. scitex/plt/color/_colors.py +205 -0
  469. scitex/plt/color/_get_colors_from_cmap.py +134 -0
  470. scitex/plt/color/_interpolate.py +29 -0
  471. scitex/plt/color/_vizualize_colors.py +54 -0
  472. scitex/plt/utils/__init__.py +44 -0
  473. scitex/plt/utils/_calc_bacc_from_conf_mat.py +46 -0
  474. scitex/plt/utils/_calc_nice_ticks.py +101 -0
  475. scitex/plt/utils/_close.py +68 -0
  476. scitex/plt/utils/_colorbar.py +96 -0
  477. scitex/plt/utils/_configure_mpl.py +295 -0
  478. scitex/plt/utils/_histogram_utils.py +132 -0
  479. scitex/plt/utils/_im2grid.py +70 -0
  480. scitex/plt/utils/_is_valid_axis.py +78 -0
  481. scitex/plt/utils/_mk_colorbar.py +65 -0
  482. scitex/plt/utils/_mk_patches.py +26 -0
  483. scitex/plt/utils/_scientific_captions.py +638 -0
  484. scitex/plt/utils/_scitex_config.py +223 -0
  485. scitex/reproduce/__init__.py +14 -0
  486. scitex/reproduce/_fix_seeds.py +45 -0
  487. scitex/reproduce/_gen_ID.py +55 -0
  488. scitex/reproduce/_gen_timestamp.py +35 -0
  489. scitex/res/__init__.py +5 -0
  490. scitex/resource/__init__.py +13 -0
  491. scitex/resource/_get_processor_usages.py +281 -0
  492. scitex/resource/_get_specs.py +280 -0
  493. scitex/resource/_log_processor_usages.py +190 -0
  494. scitex/resource/_utils/__init__.py +31 -0
  495. scitex/resource/_utils/_get_env_info.py +481 -0
  496. scitex/resource/limit_ram.py +33 -0
  497. scitex/scholar/__init__.py +24 -0
  498. scitex/scholar/_local_search.py +454 -0
  499. scitex/scholar/_paper.py +244 -0
  500. scitex/scholar/_pdf_downloader.py +325 -0
  501. scitex/scholar/_search.py +393 -0
  502. scitex/scholar/_vector_search.py +370 -0
  503. scitex/scholar/_web_sources.py +457 -0
  504. scitex/stats/__init__.py +31 -0
  505. scitex/stats/_calc_partial_corr.py +17 -0
  506. scitex/stats/_corr_test_multi.py +94 -0
  507. scitex/stats/_corr_test_wrapper.py +115 -0
  508. scitex/stats/_describe_wrapper.py +90 -0
  509. scitex/stats/_multiple_corrections.py +63 -0
  510. scitex/stats/_nan_stats.py +93 -0
  511. scitex/stats/_p2stars.py +116 -0
  512. scitex/stats/_p2stars_wrapper.py +56 -0
  513. scitex/stats/_statistical_tests.py +73 -0
  514. scitex/stats/desc/__init__.py +40 -0
  515. scitex/stats/desc/_describe.py +189 -0
  516. scitex/stats/desc/_nan.py +289 -0
  517. scitex/stats/desc/_real.py +94 -0
  518. scitex/stats/multiple/__init__.py +14 -0
  519. scitex/stats/multiple/_bonferroni_correction.py +72 -0
  520. scitex/stats/multiple/_fdr_correction.py +400 -0
  521. scitex/stats/multiple/_multicompair.py +28 -0
  522. scitex/stats/tests/__corr_test.py +277 -0
  523. scitex/stats/tests/__corr_test_multi.py +343 -0
  524. scitex/stats/tests/__corr_test_single.py +277 -0
  525. scitex/stats/tests/__init__.py +22 -0
  526. scitex/stats/tests/_brunner_munzel_test.py +192 -0
  527. scitex/stats/tests/_nocorrelation_test.py +28 -0
  528. scitex/stats/tests/_smirnov_grubbs.py +98 -0
  529. scitex/str/__init__.py +113 -0
  530. scitex/str/_clean_path.py +75 -0
  531. scitex/str/_color_text.py +52 -0
  532. scitex/str/_decapitalize.py +58 -0
  533. scitex/str/_factor_out_digits.py +281 -0
  534. scitex/str/_format_plot_text.py +498 -0
  535. scitex/str/_grep.py +48 -0
  536. scitex/str/_latex.py +155 -0
  537. scitex/str/_latex_fallback.py +471 -0
  538. scitex/str/_mask_api.py +39 -0
  539. scitex/str/_mask_api_key.py +8 -0
  540. scitex/str/_parse.py +158 -0
  541. scitex/str/_print_block.py +47 -0
  542. scitex/str/_print_debug.py +68 -0
  543. scitex/str/_printc.py +62 -0
  544. scitex/str/_readable_bytes.py +38 -0
  545. scitex/str/_remove_ansi.py +23 -0
  546. scitex/str/_replace.py +134 -0
  547. scitex/str/_search.py +125 -0
  548. scitex/str/_squeeze_space.py +36 -0
  549. scitex/tex/__init__.py +10 -0
  550. scitex/tex/_preview.py +103 -0
  551. scitex/tex/_to_vec.py +116 -0
  552. scitex/torch/__init__.py +18 -0
  553. scitex/torch/_apply_to.py +34 -0
  554. scitex/torch/_nan_funcs.py +77 -0
  555. scitex/types/_ArrayLike.py +44 -0
  556. scitex/types/_ColorLike.py +21 -0
  557. scitex/types/__init__.py +14 -0
  558. scitex/types/_is_listed_X.py +70 -0
  559. scitex/utils/__init__.py +22 -0
  560. scitex/utils/_compress_hdf5.py +116 -0
  561. scitex/utils/_email.py +120 -0
  562. scitex/utils/_grid.py +148 -0
  563. scitex/utils/_notify.py +247 -0
  564. scitex/utils/_search.py +121 -0
  565. scitex/web/__init__.py +38 -0
  566. scitex/web/_search_pubmed.py +438 -0
  567. scitex/web/_summarize_url.py +158 -0
  568. scitex-2.0.0.dist-info/METADATA +307 -0
  569. scitex-2.0.0.dist-info/RECORD +572 -0
  570. scitex-2.0.0.dist-info/WHEEL +6 -0
  571. scitex-2.0.0.dist-info/licenses/LICENSE +7 -0
  572. scitex-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,397 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-11-11 14:16:58 (ywatanabe)"
4
+ # File: ./scitex_repo/src/scitex/db/_delete_duplicates.py
5
+
6
+ import sqlite3
7
+ from typing import List, Optional, Tuple, Union
8
+ import pandas as pd
9
+
10
+ #!/usr/bin/env python3
11
+ # -*- coding: utf-8 -*-
12
+ # Time-stamp: "2024-10-20 02:17:10 (ywatanabe)"
13
+ # /data/gpfs/projects/punim2354/ywatanabe/scitex_repo/src/scitex/db/_delete_duplicates_clean.py
14
+
15
+
16
+ """
17
+ Functionality:
18
+ - Deletes duplicate entries from an SQLite database table
19
+ Input:
20
+ - SQLite database file path, table name, columns to consider for duplicates
21
+ Output:
22
+ - Updated SQLite database with duplicates removed
23
+ Prerequisites:
24
+ - sqlite3, pandas, tqdm, scitex
25
+ """
26
+
27
+
28
+ def _sort_db(cursor: sqlite3.Cursor, table_name: str, columns: List[str]) -> None:
29
+ """
30
+ Sorts the database table based on the specified columns.
31
+
32
+ Parameters
33
+ ----------
34
+ cursor : sqlite3.Cursor
35
+ The cursor object for executing SQL commands.
36
+ table_name : str
37
+ The name of the table to be sorted.
38
+ columns : List[str]
39
+ The list of column names to sort by, in order of priority.
40
+
41
+ Example
42
+ -------
43
+ >>> conn = sqlite3.connect('example.db')
44
+ >>> cursor = conn.cursor()
45
+ >>> _sort_db(cursor, 'my_table', ['column1', 'column2'])
46
+ >>> conn.commit()
47
+ >>> conn.close()
48
+ """
49
+ columns_str = ", ".join(columns)
50
+ temp_table = f"{table_name}_temp"
51
+
52
+ cursor.execute(
53
+ f"CREATE TABLE {temp_table} AS SELECT * FROM {table_name} ORDER BY {columns_str}"
54
+ )
55
+ cursor.execute(f"DROP TABLE {table_name}")
56
+ cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
57
+
58
+
59
+ def _determine_columns(
60
+ cursor: sqlite3.Cursor,
61
+ table_name: str,
62
+ columns: Union[str, List[str]],
63
+ include_blob: bool,
64
+ ) -> List[str]:
65
+ cursor.execute(f"PRAGMA table_info({table_name})")
66
+ table_info = cursor.fetchall()
67
+ all_columns = [col[1] for col in table_info]
68
+ column_types = {col[1]: col[2] for col in table_info}
69
+
70
+ if columns == "all":
71
+ columns = (
72
+ all_columns
73
+ if include_blob
74
+ else [col for col in all_columns if column_types[col].lower() != "blob"]
75
+ )
76
+ elif isinstance(columns, str):
77
+ columns = [columns]
78
+
79
+ columns_str = ", ".join(columns)
80
+ print(f"Columns considered for duplicates: {columns_str}")
81
+
82
+ return columns
83
+
84
+
85
+ def _fetch_as_df(
86
+ cursor: sqlite3.Cursor, columns: List[str], table_name: str
87
+ ) -> pd.DataFrame:
88
+ print("\nFetching all database entries...")
89
+ columns_str = ", ".join(columns)
90
+ query = f"SELECT {columns_str} FROM {table_name}"
91
+ cursor.execute(query)
92
+ df_entries = cursor.fetchall()
93
+ return pd.DataFrame(df_entries, columns=columns)
94
+
95
+
96
+ def _find_duplicated(df: pd.DataFrame) -> pd.DataFrame:
97
+ df_duplicated = df[df.duplicated(keep="first")].copy()
98
+ duplication_rate = len(df_duplicated) / (len(df) - len(df_duplicated))
99
+ print(f"\n{100*duplication_rate:.2f}% of data was duplicated. Cleaning up...")
100
+ print(f"\nOriginal entries:\n{df.head()}")
101
+ print(f"\nDuplicated entries:\n{df_duplicated.head()}")
102
+ return df_duplicated
103
+
104
+
105
+ def verify_duplicated_index(
106
+ cursor: sqlite3.Cursor, duplicated_row: pd.Series, table_name: str, dry_run: bool
107
+ ) -> Tuple[str, bool]:
108
+ """Check if entry to delete is the one intended"""
109
+ columns = list(duplicated_row.index)
110
+ columns_str = ", ".join(columns)
111
+
112
+ where_conditions = " AND ".join([f"{col} = ?" for col in columns])
113
+ select_query = f"""
114
+ SELECT {columns_str}
115
+ FROM {table_name}
116
+ WHERE {where_conditions}
117
+ """
118
+ cursor.execute(select_query, tuple(duplicated_row))
119
+ entries = cursor.fetchall()
120
+
121
+ is_verified = len(entries) >= 1
122
+
123
+ if dry_run:
124
+ print(f"Expected duplicate entry: {tuple(duplicated_row)}")
125
+ print(f"Found entries: {entries}")
126
+ print(f"Verification {'succeeded' if is_verified else 'failed'}")
127
+
128
+ return select_query, is_verified
129
+
130
+
131
+ def _delete_entry(
132
+ cursor: sqlite3.Cursor,
133
+ duplicated_row: pd.Series,
134
+ table_name: str,
135
+ dry_run: bool = True,
136
+ ) -> None:
137
+ select_query, is_verified = verify_duplicated_index(
138
+ cursor, duplicated_row, table_name, dry_run
139
+ )
140
+ if is_verified:
141
+ delete_query = select_query.replace("SELECT", "DELETE")
142
+ if dry_run:
143
+ print(f"[DRY RUN] Would delete entry:\n{duplicated_row}")
144
+ else:
145
+ cursor.execute(delete_query, tuple(duplicated_row))
146
+ print(f"Deleted entry:\n{duplicated_row}")
147
+ else:
148
+ print(f"Skipping entry (not found or already deleted):\n{duplicated_row}")
149
+
150
+
151
+ # def delete_duplicates(
152
+ # lpath_db: str,
153
+ # table_name: str,
154
+ # columns: Union[str, List[str]] = "all",
155
+ # include_blob: bool = False,
156
+ # batch_size: int = 1000,
157
+ # reindex: bool = False,
158
+ # sort: bool = False,
159
+ # dry_run: bool = True,
160
+ # ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
161
+ # """
162
+ # Delete duplicate entries from an SQLite database table.
163
+
164
+ # Parameters
165
+ # ----------
166
+ # lpath_db : str
167
+ # Path to the SQLite database file.
168
+ # table_name : str
169
+ # Name of the table to remove duplicates from.
170
+ # columns : Union[str, List[str]], optional
171
+ # Columns to consider when identifying duplicates. Default is "all".
172
+ # include_blob : bool, optional
173
+ # Whether to include BLOB columns when considering duplicates. Default is False.
174
+ # batch_size : int, optional
175
+ # Number of rows to process in each batch. Default is 1000.
176
+ # reindex : bool, optional
177
+ # Whether to reindex the table after deletion. Default is False.
178
+ # dry_run : bool, optional
179
+ # If True, simulates the deletion without actually modifying the database. Default is True.
180
+
181
+ # Returns
182
+ # -------
183
+ # Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]
184
+ # A tuple containing:
185
+ # - DataFrame of all entries after deletion process
186
+ # - DataFrame of remaining duplicates if any, None otherwise
187
+ # """
188
+ # try:
189
+ # conn = sqlite3.connect(lpath_db)
190
+ # cursor = conn.cursor()
191
+
192
+ # columns = _determine_columns(cursor, table_name, columns, include_blob)
193
+
194
+ # if sort:
195
+ # _sort_db(cursor, table_name, columns)
196
+
197
+ # df_orig = _fetch_as_df(cursor, columns, table_name)
198
+ # duplicates = _find_duplicated(df_orig)
199
+
200
+ # if duplicates.empty:
201
+ # print("Congratulations. Database is clean.")
202
+ # return df_orig, None
203
+
204
+ # columns_str = ", ".join(columns)
205
+ # where_conditions = " AND ".join([f"{col} = ?" for col in columns])
206
+ # delete_query = f"""
207
+ # DELETE FROM {table_name}
208
+ # WHERE {where_conditions}
209
+ # """
210
+
211
+ # for start in tqdm(range(0, len(duplicates), batch_size)):
212
+ # batch = duplicates.iloc[start:start+batch_size]
213
+ # batch_values = batch.values.tolist()
214
+
215
+ # if dry_run:
216
+ # print(f"[DRY RUN] Would delete {len(batch)} entries")
217
+ # else:
218
+ # cursor.executemany(delete_query, batch_values)
219
+ # conn.commit()
220
+
221
+ # if not dry_run:
222
+ # conn.commit()
223
+
224
+ # if reindex:
225
+ # print("Reindexing the table...")
226
+ # cursor.execute(f"REINDEX {table_name}")
227
+ # conn.commit()
228
+
229
+ # df_after = _fetch_as_df(cursor, columns, table_name)
230
+ # remaining_duplicates = _find_duplicated(df_after)
231
+
232
+ # if remaining_duplicates.empty:
233
+ # print("All duplicates successfully removed.")
234
+ # return df_after, None
235
+ # else:
236
+ # print(f"Warning: {len(remaining_duplicates)} duplicates still remain.\n{remaining_duplicates}")
237
+ # return df_after, remaining_duplicates
238
+
239
+ # except Exception as error:
240
+ # print(f"An error occurred: {error}")
241
+ # return None, None
242
+
243
+ # finally:
244
+ # conn.close()
245
+
246
+ # def delete_duplicates(
247
+ # lpath_db: str,
248
+ # table_name: str,
249
+ # columns: Union[str, List[str]] = "all",
250
+ # include_blob: bool = False,
251
+ # batch_size: int = 1000,
252
+ # chunk_size: int = 100_000,
253
+ # reindex: bool = False,
254
+ # sort: bool = False,
255
+ # dry_run: bool = True,
256
+ # ) -> Tuple[Optional[int], Optional[int]]:
257
+ # try:
258
+ # conn = sqlite3.connect(lpath_db)
259
+ # cursor = conn.cursor()
260
+
261
+ # columns = _determine_columns(cursor, table_name, columns, include_blob)
262
+
263
+ # if sort:
264
+ # _sort_db(cursor, table_name, columns)
265
+
266
+ # columns_str = ", ".join(columns)
267
+ # where_conditions = " AND ".join([f"{col} = ?" for col in columns])
268
+ # delete_query = f"""
269
+ # DELETE FROM {table_name}
270
+ # WHERE {where_conditions}
271
+ # """
272
+
273
+ # total_rows = cursor.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
274
+ # total_deleted = 0
275
+ # total_duplicates = 0
276
+
277
+ # for offset in tqdm(range(0, total_rows, chunk_size)):
278
+ # chunk_query = f"""
279
+ # SELECT {columns_str}
280
+ # FROM {table_name}
281
+ # LIMIT {chunk_size} OFFSET {offset}
282
+ # """
283
+ # df_chunk = pd.read_sql_query(chunk_query, conn)
284
+ # duplicates = _find_duplicated(df_chunk)
285
+ # total_duplicates += len(duplicates)
286
+
287
+ # if duplicates.empty:
288
+ # continue
289
+
290
+ # for start in range(0, len(duplicates), batch_size):
291
+ # batch = duplicates.iloc[start:start+batch_size]
292
+ # batch_values = batch.values.tolist()
293
+
294
+ # if dry_run:
295
+ # print(f"[DRY RUN] Would delete {len(batch)} entries")
296
+ # else:
297
+ # cursor.executemany(delete_query, batch_values)
298
+ # conn.commit()
299
+ # total_deleted += len(batch)
300
+
301
+ # if not dry_run:
302
+ # if reindex:
303
+ # print("Reindexing the table...")
304
+ # cursor.execute(f"REINDEX {table_name}")
305
+ # conn.commit()
306
+
307
+ # print(f"Total duplicates found: {total_duplicates}")
308
+ # print(f"Total entries deleted: {total_deleted}")
309
+
310
+ # return total_duplicates, total_deleted
311
+
312
+ # except Exception as error:
313
+ # print(f"An error occurred: {error}")
314
+ # return None, None
315
+
316
+ # finally:
317
+ # conn.close()
318
+
319
+
320
+ def delete_duplicates(
321
+ lpath_db: str,
322
+ table_name: str,
323
+ columns: Union[str, List[str]] = "all",
324
+ include_blob: bool = False,
325
+ chunk_size: int = 10_000,
326
+ dry_run: bool = True,
327
+ ) -> Tuple[Optional[int], Optional[int]]:
328
+ try:
329
+ conn = sqlite3.connect(lpath_db)
330
+ cursor = conn.cursor()
331
+
332
+ # Vacuum the database to free up space
333
+ if not dry_run:
334
+ cursor.execute("VACUUM")
335
+ conn.commit()
336
+
337
+ columns = _determine_columns(cursor, table_name, columns, include_blob)
338
+ columns_str = ", ".join(columns)
339
+
340
+ # Create a temporary table to store unique rows
341
+ temp_table = f"{table_name}_temp"
342
+ cursor.execute(
343
+ f"CREATE TABLE {temp_table} AS SELECT DISTINCT {columns_str} FROM {table_name} LIMIT 0"
344
+ )
345
+
346
+ # Process in small chunks
347
+ offset = 0
348
+ total_processed = 0
349
+ total_unique = 0
350
+
351
+ while True:
352
+ chunk_query = f"""
353
+ INSERT OR IGNORE INTO {temp_table}
354
+ SELECT DISTINCT {columns_str}
355
+ FROM {table_name}
356
+ LIMIT {chunk_size} OFFSET {offset}
357
+ """
358
+
359
+ if dry_run:
360
+ print(f"[DRY RUN] Would execute: {chunk_query}")
361
+ else:
362
+ cursor.execute(chunk_query)
363
+ conn.commit()
364
+
365
+ rows_affected = cursor.rowcount
366
+ if rows_affected == 0:
367
+ break
368
+
369
+ total_processed += chunk_size
370
+ total_unique += rows_affected
371
+ offset += chunk_size
372
+
373
+ print(f"Processed {total_processed} rows, {total_unique} unique")
374
+
375
+ total_duplicates = total_processed - total_unique
376
+
377
+ if not dry_run:
378
+ # Replace original table with the deduplicated one
379
+ cursor.execute(f"DROP TABLE {table_name}")
380
+ cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
381
+ conn.commit()
382
+
383
+ print(f"Total rows processed: {total_processed}")
384
+ print(f"Total unique rows: {total_unique}")
385
+ print(f"Total duplicates removed: {total_duplicates}")
386
+
387
+ return total_processed, total_duplicates
388
+
389
+ except Exception as error:
390
+ print(f"An error occurred: {error}")
391
+ return None, None
392
+
393
+ finally:
394
+ conn.close()
395
+
396
+
397
+ # EOF
scitex/db/_inspect.py ADDED
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-11-11 14:17:00 (ywatanabe)"
4
+ # File: ./scitex_repo/src/scitex/db/_inspect.py
5
+
6
+ import os
7
+ import sqlite3
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ #!/usr/bin/env python3
11
+ # -*- coding: utf-8 -*-
12
+ # Time-stamp: "2024-10-24 13:13:33 (ywatanabe)"
13
+ # /mnt/ssd/scitex_repo/src/scitex/db/_inspect.py
14
+
15
+
16
+ class Inspector:
17
+ def __init__(self, db_path: str):
18
+ if not os.path.exists(db_path):
19
+ raise FileNotFoundError(f"Database file not found: {db_path}")
20
+ self.db_path = db_path
21
+
22
+ def get_table_names(self) -> List[str]:
23
+ """Retrieves all table names from the database.
24
+
25
+ Returns:
26
+ List[str]: List of table names
27
+ """
28
+ with sqlite3.connect(self.db_path) as conn:
29
+ cursor = conn.cursor()
30
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
31
+ return [table[0] for table in cursor.fetchall()]
32
+
33
+ def get_table_info(
34
+ self, table_name: str
35
+ ) -> List[Tuple[int, str, str, int, Any, int, str]]:
36
+ """Retrieves table structure information.
37
+
38
+ Args:
39
+ table_name (str): Name of the table
40
+
41
+ Returns:
42
+ List[Tuple[int, str, str, int, Any, int, str]]: List of column information tuples
43
+ """
44
+ with sqlite3.connect(self.db_path) as conn:
45
+ cursor = conn.cursor()
46
+ cursor.execute(f"PRAGMA table_info({table_name})")
47
+ columns = cursor.fetchall()
48
+
49
+ cursor.execute(f"PRAGMA index_list({table_name})")
50
+ indexes = cursor.fetchall()
51
+ pk_columns = []
52
+ for idx in indexes:
53
+ if idx[2] == 1: # Is primary key
54
+ cursor.execute(f"PRAGMA index_info({idx[1]})")
55
+ pk_columns.extend([info[2] for info in cursor.fetchall()])
56
+
57
+ enhanced_columns = []
58
+ for col in columns:
59
+ constraints = []
60
+ if col[1] in pk_columns:
61
+ constraints.append("PRIMARY KEY")
62
+ if col[3] == 1:
63
+ constraints.append("NOT NULL")
64
+ enhanced_columns.append(col + (" ".join(constraints),))
65
+
66
+ return enhanced_columns
67
+
68
+ def get_sample_data(
69
+ self, table_name: str, limit: int = 5
70
+ ) -> Tuple[List[str], List[Tuple], int]:
71
+ """Retrieves sample data from the specified table.
72
+
73
+ Args:
74
+ table_name (str): Name of the table
75
+ limit (int, optional): Number of rows to retrieve. Defaults to 5.
76
+
77
+ Returns:
78
+ Tuple[List[str], List[Tuple], int]: Column names, sample data rows, and total row count
79
+ """
80
+ with sqlite3.connect(self.db_path) as conn:
81
+ cursor = conn.cursor()
82
+ cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
83
+ columns = [description[0] for description in cursor.description]
84
+ sample_data = cursor.fetchall()
85
+
86
+ cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
87
+ total_rows = cursor.fetchone()[0]
88
+
89
+ return columns, sample_data, total_rows
90
+
91
+ def inspect(
92
+ self,
93
+ table_names: Optional[List[str]] = None,
94
+ verbose=True,
95
+ ) -> List[Dict[str, Any]]:
96
+ import pandas as pd
97
+
98
+ if table_names is None:
99
+ table_names = self.get_table_names()
100
+
101
+ data_tables = []
102
+ for table_name in table_names:
103
+ columns = self.get_table_info(table_name)
104
+ column_names, rows, total_rows = self.get_sample_data(table_name)
105
+
106
+ meta = {}
107
+ meta["table_name"] = table_name
108
+ meta["n_total_rows"] = total_rows
109
+
110
+ sample_data = pd.DataFrame(
111
+ [
112
+ {
113
+ col: (str(value) if not isinstance(value, bytes) else "<BLOB>")
114
+ for col, value in zip(column_names, row)
115
+ }
116
+ for row in rows
117
+ ]
118
+ )
119
+
120
+ for k, v in meta.items():
121
+ sample_data[k] = v
122
+
123
+ sample_data = sample_data.set_index(["table_name", "n_total_rows"])
124
+
125
+ data_tables.append(sample_data)
126
+
127
+ # if len(data_tables) == 1:
128
+ # return data_tables[0]
129
+ # else:
130
+ # return tuple(data_tables)
131
+ return data_tables
132
+
133
+
134
+ def inspect(
135
+ lpath_db: str, table_names: Optional[List[str]] = None, verbose: bool = True
136
+ ) -> None:
137
+ """
138
+ Inspects the specified SQLite database.
139
+
140
+ Example:
141
+ >>> inspect('path/to/database.db')
142
+ >>> inspect('path/to/database.db', ['table1', 'table2'])
143
+
144
+ Args:
145
+ lpath_db (str): Path to the SQLite database file
146
+ table_names (Optional[List[str]], optional): List of table names to inspect.
147
+ If None, inspects all tables. Defaults to None.
148
+ """
149
+ inspector = Inspector(lpath_db)
150
+ overviews_tables = inspector.inspect(table_names, verbose=verbose)
151
+ if verbose:
152
+ for dd in overviews_tables:
153
+ print(f"\n{dd}\n")
154
+ return overviews_tables
155
+
156
+
157
+ # python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\")"
158
+ # python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"eeg_data_reindexed\"])"
159
+ # python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"eeg_data\"])"
160
+ # python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"sqlite_sequence\"])"
161
+
162
+
163
+ # EOF
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env python3
2
+ """Scitex decorators module."""
3
+
4
+ from ._auto_order import AutoOrderDecorator, batch_fn, disable_auto_order, enable_auto_order, numpy_fn, pandas_fn, torch_fn
5
+ from ._batch_fn import batch_fn
6
+ from ._cache_disk import cache_disk
7
+ from ._cache_mem import cache_mem
8
+ from ._combined import batch_numpy_fn, batch_pandas_fn, batch_torch_fn, numpy_batch_fn, pandas_batch_fn, torch_batch_fn
9
+ from ._converters import ConversionWarning, is_cuda, is_nested_decorator, is_torch, to_numpy, to_torch
10
+ from ._deprecated import deprecated
11
+ from ._not_implemented import not_implemented
12
+ from ._numpy_fn import numpy_fn
13
+ from ._pandas_fn import pandas_fn
14
+ from ._preserve_doc import preserve_doc
15
+ from ._signal_fn import signal_fn
16
+ from ._timeout import timeout
17
+ from ._torch_fn import torch_fn
18
+ from ._wrap import wrap
19
+ from ._xarray_fn import xarray_fn
20
+
21
+ __all__ = [
22
+ "AutoOrderDecorator",
23
+ "ConversionWarning",
24
+ "batch_fn",
25
+ "batch_fn",
26
+ "batch_numpy_fn",
27
+ "batch_pandas_fn",
28
+ "batch_torch_fn",
29
+ "cache_disk",
30
+ "cache_mem",
31
+ "deprecated",
32
+ "disable_auto_order",
33
+ "enable_auto_order",
34
+ "is_cuda",
35
+ "is_nested_decorator",
36
+ "is_torch",
37
+ "not_implemented",
38
+ "numpy_batch_fn",
39
+ "numpy_fn",
40
+ "numpy_fn",
41
+ "pandas_batch_fn",
42
+ "pandas_fn",
43
+ "pandas_fn",
44
+ "preserve_doc",
45
+ "signal_fn",
46
+ "timeout",
47
+ "to_numpy",
48
+ "to_torch",
49
+ "torch_batch_fn",
50
+ "torch_fn",
51
+ "torch_fn",
52
+ "wrap",
53
+ "xarray_fn",
54
+ ]