scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
scitex/db/_SQLite3.py DELETED
@@ -1,2136 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # Time-stamp: "2024-11-25 06:08:26 (ywatanabe)"
4
- # File: ./scitex_repo/src/scitex/db/_SQLite3.py
5
-
6
- THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3.py"
7
-
8
- from typing import List, Optional
9
-
10
- from ..str import printc as _printc
11
- from ._SQLite3Mixins._BatchMixin import _BatchMixin
12
- from ._SQLite3Mixins._BlobMixin import _BlobMixin
13
- from ._SQLite3Mixins._ConnectionMixin import _ConnectionMixin
14
- from ._SQLite3Mixins._ImportExportMixin import _ImportExportMixin
15
- from ._SQLite3Mixins._IndexMixin import _IndexMixin
16
- from ._SQLite3Mixins._MaintenanceMixin import _MaintenanceMixin
17
- from ._SQLite3Mixins._QueryMixin import _QueryMixin
18
- from ._SQLite3Mixins._RowMixin import _RowMixin
19
- from ._SQLite3Mixins._TableMixin import _TableMixin
20
- from ._SQLite3Mixins._TransactionMixin import _TransactionMixin
21
-
22
-
23
- class SQLite3(
24
- _ConnectionMixin,
25
- _QueryMixin,
26
- _TransactionMixin,
27
- _TableMixin,
28
- _IndexMixin,
29
- _RowMixin,
30
- _BatchMixin,
31
- _BlobMixin,
32
- _ImportExportMixin,
33
- _MaintenanceMixin,
34
- ):
35
- """Comprehensive SQLite database management class."""
36
-
37
- def __init__(self, db_path: str, use_temp: bool = False):
38
- """Initializes database with option for temporary copy."""
39
- _ConnectionMixin.__init__(self, db_path, use_temp)
40
-
41
- def __call__(
42
- self,
43
- return_summary=False,
44
- print_summary=True,
45
- table_names: Optional[List[str]] = None,
46
- verbose: bool = True,
47
- limit: int = 5,
48
- ):
49
- summary = self.get_summaries(
50
- table_names=table_names,
51
- verbose=verbose,
52
- limit=limit,
53
- )
54
-
55
- if print_summary:
56
- for k, v in summary.items():
57
- _printc(f"{k}\n{v}")
58
-
59
- if return_summary:
60
- return summary
61
-
62
- @property
63
- def summary(self):
64
- self()
65
-
66
-
67
- BaseSQLiteDB = SQLite3
68
-
69
- # #!/usr/bin/env python3
70
- # # -*- coding: utf-8 -*-
71
- # # Time-stamp: "2024-11-11 13:48:57 (ywatanabe)"
72
- # # File: ./scitex_repo/src/scitex/db/_BaseSQLiteDB.py
73
-
74
- # """
75
- # BaseSQLiteDB: Comprehensive SQLite Database Management Class
76
-
77
- # Features:
78
- # - Basic database operations (connect, execute, close)
79
- # - Transaction management
80
- # - Table operations (create, drop, modify)
81
- # - Index management
82
- # - Row operations (CRUD)
83
- # - BLOB data handling
84
- # - Batch operations
85
- # - Foreign key constraints
86
- # - Import/Export capabilities (CSV)
87
- # - Database maintenance and optimization
88
-
89
- # Dependencies:
90
- # - sqlite3: Core database operations
91
- # - pandas: Data manipulation and CSV handling
92
- # - numpy: BLOB data processing
93
- # """
94
-
95
- # """Imports"""
96
- # from typing import Any, Callable, Dict, List, Optional, Tuple, Union
97
- # import contextlib
98
- # import os
99
- # import sqlite3
100
- # import threading
101
- # import numpy as np
102
- # import pandas as pd
103
- # from ..str.__printc import _printc
104
-
105
- # """Parameters"""
106
- # # CONFIG = scitex.gen.load_configs()
107
-
108
- # """Functions & Classes"""
109
-
110
- # class BaseSQLiteDB:
111
- # """Comprehensive SQLite database management class providing:
112
- # - Basic operations (connect, execute, close)
113
- # - Transaction and batch operations
114
- # - Table and index management
115
- # - Row operations (CRUD)
116
- # - BLOB data handling
117
- # - Import/Export capabilities
118
- # - Database maintenance
119
- # """
120
-
121
- # # ----------------------------------------
122
- # # Basic Database Operations
123
- # # ----------------------------------------
124
- # def __init__(self, db_path: str):
125
- # """Initializes SQLite database connection.
126
-
127
- # Parameters
128
- # ----------
129
- # db_path : str
130
- # Path to SQLite database file
131
-
132
- # Raises
133
- # ------
134
- # sqlite3.Error
135
- # If database connection fails
136
- # """
137
- # self.lock = threading.Lock()
138
- # self._maintenance_lock = threading.Lock()
139
- # self.conn: Optional[sqlite3.Connection] = None
140
- # self.cursor: Optional[sqlite3.Cursor] = None
141
- # self.db_path = db_path
142
- # if db_path:
143
- # self.connect(db_path)
144
- # # self._initialize_writable_state()
145
-
146
- # # if writable:
147
- # # self.writable = True
148
- # # else:
149
- # # self.writable = False
150
-
151
- # def __call__(
152
- # self,
153
- # return_summary=False,
154
- # print_summary=True,
155
- # table_names: Optional[List[str]] = None,
156
- # verbose: bool = True,
157
- # limit: int = 5,
158
- # ):
159
- # summary = self.get_summaries(
160
- # table_names=table_names,
161
- # verbose=verbose,
162
- # limit=limit,
163
- # )
164
-
165
- # if print_summary:
166
- # for k, v in summary.items():
167
- # _printc(f"{k}\n{v}")
168
-
169
- # if return_summary:
170
- # return summary
171
-
172
- # @contextlib.contextmanager
173
- # def transaction(self):
174
- # """Safe transaction context manager"""
175
- # try:
176
- # self.begin()
177
- # yield
178
- # self.commit()
179
- # except Exception as e:
180
- # self.rollback()
181
- # raise e
182
-
183
- # # ----------------------------------------
184
- # # Writable states
185
- # # ----------------------------------------
186
- # # def _initialize_writable_state(self) -> None:
187
- # # """Initializes writable state table and protects it."""
188
- # # try:
189
- # # # Create state table without protection by default
190
- # # self.execute(
191
- # # """
192
- # # CREATE TABLE IF NOT EXISTS _db_state (
193
- # # key TEXT PRIMARY KEY,
194
- # # value TEXT,
195
- # # protected INTEGER DEFAULT 0
196
- # # )
197
- # # """
198
- # # )
199
-
200
- # # # Initialize state
201
- # # self.execute(
202
- # # """
203
- # # INSERT OR IGNORE INTO _db_state (key, value, protected)
204
- # # VALUES ('writable', 'true', 0)
205
- # # """
206
- # # )
207
- # # except sqlite3.Error as err:
208
- # # raise ValueError(f"Failed to initialize writable state: {err}")
209
-
210
- # # def _initialize_writable_state(self) -> None:
211
- # # """Initializes writable state table and protects it."""
212
- # # try:
213
- # # # Create state table
214
- # # self.execute(
215
- # # """
216
- # # CREATE TABLE IF NOT EXISTS _db_state (
217
- # # key TEXT PRIMARY KEY,
218
- # # value TEXT,
219
- # # protected INTEGER DEFAULT 1
220
- # # )
221
- # # """
222
- # # )
223
-
224
- # # # Create trigger
225
- # # self.execute(
226
- # # """
227
- # # CREATE TRIGGER IF NOT EXISTS protect_db_state
228
- # # BEFORE UPDATE ON _db_state
229
- # # BEGIN
230
- # # SELECT CASE
231
- # # WHEN OLD.protected = 1 THEN
232
- # # RAISE(ABORT, 'Cannot modify protected state')
233
- # # END;
234
- # # END;
235
- # # """
236
- # # )
237
-
238
- # # # Initialize state
239
- # # self.execute(
240
- # # """
241
- # # INSERT OR IGNORE INTO _db_state (key, value, protected)
242
- # # VALUES ('writable', 'true', 1)
243
- # # """
244
- # # )
245
- # # except sqlite3.Error as err:
246
- # # raise ValueError(f"Failed to initialize writable state: {err}")
247
-
248
- # @property
249
- # def writable(self) -> bool:
250
- # """Gets database writable state from internal table."""
251
- # try:
252
- # self.cursor.execute(
253
- # "SELECT value FROM _db_state WHERE key = 'writable'"
254
- # )
255
- # result = self.cursor.fetchone()
256
- # return result[0].lower() == "true" if result else True
257
- # except sqlite3.Error:
258
- # return True
259
-
260
- # @writable.setter
261
- # def writable(self, state: bool) -> None:
262
- # """Sets database writable state with proper authorization."""
263
- # try:
264
- # # Temporarily disable protection
265
- # self.execute(
266
- # "UPDATE _db_state SET protected = 0 WHERE key = 'writable'"
267
- # )
268
- # # Update state
269
- # self.execute(
270
- # "UPDATE _db_state SET value = ? WHERE key = 'writable'",
271
- # (str(state).lower(),),
272
- # )
273
- # # Re-enable protection
274
- # self.execute(
275
- # "UPDATE _db_state SET protected = 1 WHERE key = 'writable'"
276
- # )
277
- # self.execute("PRAGMA query_only = ?", (not state,))
278
- # except sqlite3.Error as err:
279
- # raise ValueError(f"Failed to set writable state: {err}")
280
-
281
- # def _check_writable(self) -> None:
282
- # """Verifies database is writable before write operations."""
283
- # if not self.writable:
284
- # raise ValueError("Database is in read-only mode")
285
-
286
- # # ----------------------------------------
287
- # # Connection
288
- # # ----------------------------------------
289
- # def __enter__(self):
290
- # return self
291
-
292
- # def __exit__(self, exc_type, exc_val, exc_tb):
293
- # self.close()
294
-
295
- # def connect(self, db_path: str) -> None:
296
- # """Establishes connection to SQLite database.
297
-
298
- # Example
299
- # -------
300
- # >>> db = BaseSQLiteDB('example.db')
301
- # >>> db.connect('new_database.db')
302
-
303
- # Parameters
304
- # ----------
305
- # db_path : str
306
- # Path to SQLite database file
307
-
308
- # Raises
309
- # ------
310
- # sqlite3.Error
311
- # If connection fails
312
- # """
313
-
314
- # if self.conn:
315
- # self.close()
316
- # self.conn = sqlite3.connect(db_path)
317
- # self.cursor = self.conn.cursor()
318
-
319
- # def close(self) -> None:
320
- # """Closes database connection and cursor.
321
-
322
- # Example
323
- # -------
324
- # >>> db = BaseSQLiteDB('example.db')
325
- # >>> db.close()
326
- # """
327
- # if self.cursor:
328
- # self.cursor.close()
329
- # if self.conn:
330
- # self.conn.close()
331
- # self.cursor = None
332
- # self.conn = None
333
-
334
- # def reconnect(self) -> None:
335
- # """Reestablishes database connection.
336
-
337
- # Example
338
- # -------
339
- # >>> db = BaseSQLiteDB('example.db')
340
- # >>> db.close()
341
- # >>> db.reconnect()
342
-
343
- # Raises
344
- # ------
345
- # ValueError
346
- # If no database path was previously specified
347
- # sqlite3.Error
348
- # If connection fails
349
- # """
350
- # if self.db_path:
351
- # self.connect(self.db_path)
352
- # else:
353
- # raise ValueError("No database path specified for reconnection")
354
-
355
- # def execute(self, query: str, parameters: Tuple = ()) -> None:
356
- # """Executes single SQL query with optional parameters.
357
-
358
- # Example
359
- # -------
360
- # >>> db.execute("INSERT INTO users (name) VALUES (?)", ("John",))
361
- # >>> db.execute("SELECT * FROM users WHERE age > ?", (25,))
362
- # >>> db.execute("CREATE TABLE IF NOT EXISTS test (id INTEGER PRIMARY KEY)")
363
-
364
- # Parameters
365
- # ----------
366
- # query : str
367
- # SQL query to execute
368
- # parameters : Tuple, optional
369
- # Query parameters to substitute
370
-
371
- # Raises
372
- # ------
373
- # ConnectionError
374
- # If database is not connected
375
- # ValueError
376
- # If database is in read-only mode for write operations
377
- # sqlite3.Error
378
- # If query execution fails
379
- # """
380
- # if not self.cursor:
381
- # raise ConnectionError("Database not connected")
382
-
383
- # # Check if operation requires write permission
384
- # if any(
385
- # keyword in query.upper()
386
- # for keyword in [
387
- # "INSERT",
388
- # "UPDATE",
389
- # "DELETE",
390
- # "DROP",
391
- # "CREATE",
392
- # "ALTER",
393
- # ]
394
- # ):
395
- # self._check_writable()
396
-
397
- # try:
398
- # self.cursor.execute(query, parameters)
399
- # self.conn.commit()
400
- # return self.cursor
401
- # except sqlite3.Error as err:
402
- # raise sqlite3.Error(f"Query execution failed: {err}")
403
-
404
- # def executemany(self, query: str, parameters: List[Tuple]) -> None:
405
- # """Executes batch SQL query with multiple parameter sets.
406
-
407
- # Example
408
- # -------
409
- # >>> params = [("John", 30), ("Jane", 25)]
410
- # >>> db.executemany("INSERT INTO users (name, age) VALUES (?, ?)", params)
411
-
412
- # Parameters
413
- # ----------
414
- # query : str
415
- # SQL query to execute
416
- # parameters : List[Tuple]
417
- # List of parameter tuples for batch execution
418
-
419
- # Raises
420
- # ------
421
- # ConnectionError
422
- # If database is not connected
423
- # ValueError
424
- # If database is in read-only mode for write operations
425
- # sqlite3.Error
426
- # If batch execution fails
427
- # """
428
- # if not self.cursor:
429
- # raise ConnectionError("Database not connected")
430
-
431
- # if any(
432
- # keyword in query.upper()
433
- # for keyword in [
434
- # "INSERT",
435
- # "UPDATE",
436
- # "DELETE",
437
- # "DROP",
438
- # "CREATE",
439
- # "ALTER",
440
- # ]
441
- # ):
442
- # self._check_writable()
443
-
444
- # try:
445
- # self.cursor.executemany(query, parameters)
446
- # self.conn.commit()
447
- # except sqlite3.Error as err:
448
- # raise sqlite3.Error(f"Batch query execution failed: {err}")
449
-
450
- # def executescript(self, script: str) -> None:
451
- # """Executes multiple SQL statements at once.
452
-
453
- # Parameters
454
- # ----------
455
- # script : str
456
- # Multiple SQL statements separated by semicolons
457
-
458
- # Raises
459
- # ------
460
- # ConnectionError
461
- # If database is not connected
462
- # ValueError
463
- # If database is in read-only mode
464
- # sqlite3.Error
465
- # If script execution fails
466
- # """
467
- # if not self.cursor:
468
- # raise ConnectionError("Database not connected")
469
-
470
- # if any(
471
- # keyword in script.upper()
472
- # for keyword in [
473
- # "INSERT",
474
- # "UPDATE",
475
- # "DELETE",
476
- # "DROP",
477
- # "CREATE",
478
- # "ALTER",
479
- # ]
480
- # ):
481
- # self._check_writable()
482
-
483
- # try:
484
- # self.cursor.executescript(script)
485
- # self.conn.commit()
486
- # except sqlite3.Error as err:
487
- # raise sqlite3.Error(f"Script execution failed: {err}")
488
-
489
- # # ----------------------------------------
490
- # # Transaction Management
491
- # # ----------------------------------------
492
- # def begin(self) -> None:
493
- # """Starts a new database transaction.
494
-
495
- # Example
496
- # -------
497
- # >>> db.begin()
498
- # >>> try:
499
- # ... db.execute("INSERT INTO users (name) VALUES (?)", ("John",))
500
- # ... db.commit_transaction()
501
- # ... except:
502
- # ... db.rollback_transaction()
503
-
504
- # Raises
505
- # ------
506
- # sqlite3.Error
507
- # If transaction start fails
508
- # """
509
- # self.execute("BEGIN TRANSACTION")
510
-
511
- # def commit(self) -> None:
512
- # """Commits pending transaction changes.
513
-
514
- # Example
515
- # -------
516
- # >>> db.begin()
517
- # >>> db.execute("INSERT INTO users (name) VALUES (?)", ("John",))
518
- # >>> db.commit()
519
-
520
- # Raises
521
- # ------
522
- # sqlite3.Error
523
- # If commit fails
524
- # """
525
- # self.conn.commit()
526
-
527
- # def rollback(self) -> None:
528
- # """Reverts changes from current transaction.
529
-
530
- # Example
531
- # -------
532
- # >>> db.begin()
533
- # >>> try:
534
- # ... db.execute("Invalid SQL")
535
- # ... except:
536
- # ... db.rollback()
537
-
538
- # Raises
539
- # ------
540
- # sqlite3.Error
541
- # If rollback fails
542
- # """
543
- # self.conn.rollback()
544
-
545
- # # ----------------------------------------
546
- # # Foreign Key Management
547
- # # ----------------------------------------
548
- # def enable_foreign_keys(self) -> None:
549
- # """Enables foreign key constraint checking.
550
-
551
- # Example
552
- # -------
553
- # >>> db.enable_foreign_keys()
554
- # >>> db.execute("CREATE TABLE users (id INTEGER PRIMARY KEY)")
555
- # >>> db.execute('''CREATE TABLE posts
556
- # ... (id INTEGER PRIMARY KEY,
557
- # ... user_id INTEGER,
558
- # ... FOREIGN KEY(user_id) REFERENCES users(id))''')
559
-
560
- # Raises
561
- # ------
562
- # sqlite3.Error
563
- # If PRAGMA command fails
564
- # """
565
- # self.execute("PRAGMA foreign_keys = ON")
566
-
567
- # def disable_foreign_keys(self) -> None:
568
- # """Disables foreign key constraint checking.
569
-
570
- # Example
571
- # -------
572
- # >>> db.disable_foreign_keys()
573
- # >>> # Now foreign key constraints won't be enforced
574
-
575
- # Raises
576
- # ------
577
- # sqlite3.Error
578
- # If PRAGMA command fails
579
- # """
580
- # self.execute("PRAGMA foreign_keys = OFF")
581
-
582
- # # ----------------------------------------
583
- # # Index Management
584
- # # ----------------------------------------
585
- # def create_index(
586
- # self,
587
- # table_name: str,
588
- # column_names: List[str],
589
- # index_name: str = None,
590
- # unique: bool = False,
591
- # ) -> None:
592
- # """Creates an index on specified table columns for improved query performance.
593
-
594
- # Parameters
595
- # ----------
596
- # table_name : str
597
- # Name of table to create index on
598
- # column_names : List[str]
599
- # List of columns to include in index
600
- # index_name : str, optional
601
- # Custom name for index. If None, auto-generated from table and column names
602
- # unique : bool, optional
603
- # Whether to create a unique index (default False)
604
-
605
- # Example
606
- # -------
607
- # >>> db.create_index('users', ['email'], unique=True)
608
- # >>> db.create_index('posts', ['user_id', 'created_at'])
609
-
610
- # Raises
611
- # ------
612
- # sqlite3.Error
613
- # If index creation fails
614
- # """
615
- # if index_name is None:
616
- # index_name = f"idx_{table_name}_{'_'.join(column_names)}"
617
- # unique_clause = "UNIQUE" if unique else ""
618
- # query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({','.join(column_names)})"
619
- # self.execute(query)
620
-
621
- # def drop_index(self, index_name: str) -> None:
622
- # """Drops an existing database index.
623
-
624
- # Parameters
625
- # ----------
626
- # index_name : str
627
- # Name of index to drop
628
-
629
- # Raises
630
- # ------
631
- # sqlite3.Error
632
- # If index drop fails
633
- # """
634
- # self.execute(f"DROP INDEX IF EXISTS {index_name}")
635
-
636
- # # ----------------------------------------
637
- # # Table Management
638
- # # ----------------------------------------
639
- # def create_table(
640
- # self,
641
- # table_name: str,
642
- # columns: Dict[str, str],
643
- # foreign_keys: List[Dict[str, str]] = None,
644
- # if_not_exists: bool = True,
645
- # ) -> None:
646
- # with self.transaction():
647
- # try:
648
- # exists_clause = "IF NOT EXISTS " if if_not_exists else ""
649
- # column_defs = []
650
-
651
- # for col_name, col_type in columns.items():
652
- # column_defs.append(f"{col_name} {col_type}")
653
- # if "BLOB" in col_type.upper():
654
- # column_defs.extend(
655
- # [
656
- # f"{col_name}_dtype TEXT DEFAULT 'unknown'",
657
- # f"{col_name}_shape TEXT DEFAULT 'unknown'",
658
- # ]
659
- # )
660
-
661
- # # Add foreign key constraints
662
- # if foreign_keys:
663
- # for fk in foreign_keys:
664
- # column_defs.append(
665
- # f"FOREIGN KEY ({fk['tgt_column']}) REFERENCES {fk['src_table']}({fk['src_column']})"
666
- # )
667
-
668
- # query = f"CREATE TABLE {exists_clause}{table_name} ({', '.join(column_defs)})"
669
- # self.execute(query)
670
-
671
- # except sqlite3.Error as err:
672
- # raise ValueError(f"Failed to create table {table_name}: {err}")
673
-
674
- # # def create_table(
675
- # # self,
676
- # # table_name: str,
677
- # # columns: Dict[str, str],
678
- # # foreign_keys: Dict[str, Union[str, List[str]]] = None,
679
- # # if_not_exists: bool = True,
680
- # # ) -> None:
681
- # # """Creates new table with metadata columns for BLOB types and foreign keys.
682
-
683
- # # Parameters
684
- # # ----------
685
- # # table_name : str
686
- # # Name of table to create
687
- # # columns : Dict[str, str]
688
- # # Column definitions with names as keys and SQL types as values
689
- # # foreign_keys : Dict[str, Union[str, List[str]]], optional
690
- # # Foreign key definitions as {table_name: column_name(s)}
691
- # # if_not_exists : bool, optional
692
- # # Whether to use IF NOT EXISTS clause (default True)
693
-
694
- # # Example
695
- # # -------
696
- # # >>> columns = {
697
- # # ... 'id': 'INTEGER PRIMARY KEY',
698
- # # ... 'name': 'TEXT',
699
- # # ... 'data': 'BLOB'
700
- # # ... }
701
- # # >>> foreign_keys = {'parent_table': ['id', 'name']}
702
- # # >>> db.create_table('measurements', columns, foreign_keys)
703
- # # """
704
- # # try:
705
- # # # Create base table
706
- # # exists_clause = "IF NOT EXISTS " if if_not_exists else ""
707
- # # column_defs = []
708
-
709
- # # for col_name, col_type in columns.items():
710
- # # column_defs.append(f"{col_name} {col_type}")
711
- # # if "BLOB" in col_type.upper():
712
- # # column_defs.extend([
713
- # # f"{col_name}_dtype TEXT DEFAULT 'unknown'",
714
- # # f"{col_name}_shape TEXT DEFAULT 'unknown'"
715
- # # ])
716
-
717
- # # # Add foreign key constraints
718
- # # if foreign_keys:
719
- # # for src_table, columns in foreign_keys.items():
720
- # # if isinstance(columns, str):
721
- # # columns = [columns]
722
- # # for column in columns:
723
- # # column_defs.append(
724
- # # f"FOREIGN KEY ({column}) REFERENCES {src_table}({column})"
725
- # # )
726
-
727
- # # query = f"CREATE TABLE {exists_clause}{table_name} ({', '.join(column_defs)})"
728
- # # self.execute(query)
729
-
730
- # # except sqlite3.Error as err:
731
- # # raise ValueError(f"Failed to create table {table_name}: {err}")
732
-
733
- # def drop_table(self, table_name: str, if_exists: bool = True) -> None:
734
- # """Drops a table from the database.
735
-
736
- # Parameters
737
- # ----------
738
- # table_name : str
739
- # Name of table to drop
740
- # if_exists : bool, optional
741
- # Whether to ignore if table doesn't exist (default True)
742
-
743
- # Example
744
- # -------
745
- # >>> db.drop_table('temporary_table')
746
- # >>> db.drop_table('users', if_exists=False) # Raises error if table doesn't exist
747
-
748
- # Raises
749
- # ------
750
- # ValueError
751
- # If table drop fails
752
- # sqlite3.Error
753
- # If SQL execution fails
754
- # """
755
- # with self.transaction():
756
- # try:
757
- # exists_clause = "IF EXISTS " if if_exists else ""
758
- # query = f"DROP TABLE {exists_clause}{table_name}"
759
- # self.execute(query)
760
- # except sqlite3.Error as err:
761
- # raise ValueError(f"Failed to drop table: {err}")
762
-
763
- # def rename_table(self, old_name: str, new_name: str) -> None:
764
- # """Renames a table in the database.
765
-
766
- # Parameters
767
- # ----------
768
- # old_name : str
769
- # Current name of the table
770
- # new_name : str
771
- # New name for the table
772
-
773
- # Raises
774
- # ------
775
- # ValueError
776
- # If table rename fails
777
- # sqlite3.Error
778
- # If SQL execution fails
779
- # """
780
- # with self.transaction():
781
- # try:
782
- # query = f"ALTER TABLE {old_name} RENAME TO {new_name}"
783
- # self.execute(query)
784
- # except sqlite3.Error as err:
785
- # raise ValueError(f"Failed to rename table: {err}")
786
-
787
- # def add_columns(
788
- # self,
789
- # table_name: str,
790
- # columns: Dict[str, str],
791
- # default_values: Dict[str, Any] = None,
792
- # ) -> None:
793
- # """Adds multiple columns to an existing table.
794
-
795
- # Parameters
796
- # ----------
797
- # table_name : str
798
- # Name of table to modify
799
- # columns : Dict[str, str]
800
- # Dictionary mapping column names to their SQL types
801
- # default_values : Dict[str, Any], optional
802
- # Dictionary mapping column names to their default values
803
-
804
- # Example
805
- # -------
806
- # >>> columns = {"age": "INTEGER", "status": "TEXT"}
807
- # >>> default_values = {"age": 0, "status": "'draft'"}
808
- # >>> db.add_columns("users", columns, default_values)
809
- # """
810
- # with self.transaction():
811
- # if default_values is None:
812
- # default_values = {}
813
-
814
- # for column_name, column_type in columns.items():
815
- # self.add_column(
816
- # table_name,
817
- # column_name,
818
- # column_type,
819
- # default_values.get(column_name),
820
- # )
821
-
822
- # def add_column(
823
- # self,
824
- # table_name: str,
825
- # column_name: str,
826
- # column_type: str,
827
- # default_value: Any = None,
828
- # ) -> None:
829
- # """Adds a new column to an existing table if it doesn't exist."""
830
- # # Check if column exists using get_table_schema
831
- # with self.transaction():
832
- # schema = self.get_table_schema(table_name)
833
- # if column_name in schema["name"].values:
834
- # return
835
-
836
- # try:
837
- # query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
838
- # if default_value is not None:
839
- # query += f" DEFAULT {default_value}"
840
- # self.execute(query)
841
-
842
- # # Add metadata columns if BLOB type
843
- # if "BLOB" in column_type.upper():
844
- # self.add_column(
845
- # table_name,
846
- # f"{column_name}_dtype",
847
- # "TEXT",
848
- # default_value="'unknown'",
849
- # )
850
- # self.add_column(
851
- # table_name,
852
- # f"{column_name}_shape",
853
- # "TEXT",
854
- # default_value="'unknown'",
855
- # )
856
-
857
- # except sqlite3.OperationalError as err:
858
- # raise ValueError(f"Failed to add column: {err}")
859
-
860
- # # def drop_columns(self, table_name: str, columns: Union[str, List[str]], if_exists: bool = True) -> None:
861
- # # """Drops columns more efficiently using SQLite's ALTER TABLE."""
862
- # # if isinstance(columns, str):
863
- # # columns = [columns]
864
-
865
- # # # Get existing columns
866
- # # schema = self.get_table_schema(table_name)
867
- # # existing_columns = schema['name'].values
868
-
869
- # # # Filter to only existing columns if if_exists=True
870
- # # columns_to_drop = [col for col in columns if col in existing_columns] if if_exists else columns
871
-
872
- # # if not columns_to_drop:
873
- # # return
874
-
875
- # # try:
876
- # # for column in columns_to_drop:
877
- # # self.execute(f"ALTER TABLE {table_name} DROP COLUMN {column}")
878
- # # except sqlite3.Error as err:
879
- # # raise ValueError(f"Failed to drop columns: {err}")
880
-
881
- # def drop_columns(
882
- # self,
883
- # table_name: str,
884
- # columns: Union[str, List[str]],
885
- # if_exists: bool = True,
886
- # ) -> None:
887
- # with self.transaction():
888
- # if isinstance(columns, str):
889
- # columns = [columns]
890
- # schema = self.get_table_schema(table_name)
891
- # existing_columns = schema["name"].values
892
- # columns_to_drop = (
893
- # [col for col in columns if col in existing_columns]
894
- # if if_exists
895
- # else columns
896
- # )
897
-
898
- # if not columns_to_drop:
899
- # return
900
-
901
- # for column in columns_to_drop:
902
- # self.execute(f"ALTER TABLE {table_name} DROP COLUMN {column}")
903
-
904
- # def add_foreign_columns(
905
- # self,
906
- # tgt_table: str,
907
- # foreign_keys: Dict[str, Union[str, List[str]]],
908
- # default_value: Any = None,
909
- # ) -> None:
910
- # """Adds new foreign key columns to an existing table.
911
-
912
- # Parameters
913
- # ----------
914
- # tgt_table : str
915
- # Name of target table to modify
916
- # foreign_keys : Dict[str, Union[str, List[str]]]
917
- # Dictionary mapping source tables to their column(s)
918
- # Format: {source_table: column_name or [column_names]}
919
- # default_value : Any, optional
920
- # Default value for existing rows
921
-
922
- # Example
923
- # -------
924
- # >>> foreign_keys = {"id_manager": ["id", "patient_id"]}
925
- # >>> db.add_foreign_column("time_table", foreign_keys)
926
- # """
927
- # with self.transaction():
928
- # for src_table, columns in foreign_keys.items():
929
- # if isinstance(columns, str):
930
- # columns = [columns]
931
-
932
- # for column in columns:
933
- # temp_table = f"{tgt_table}_temp"
934
- # self._add_single_foreign_column(
935
- # tgt_table, temp_table, src_table, column, default_value
936
- # )
937
-
938
- # # def add_foreign_columns(
939
- # # self,
940
- # # tgt_table: str,
941
- # # foreign_keys: Dict[str, Union[str, List[str]]],
942
- # # default_value: Any = None,
943
- # # ) -> None:
944
- # # """Adds new foreign key columns to an existing table.
945
-
946
- # # Parameters
947
- # # ----------
948
- # # tgt_table : str
949
- # # Name of target table to modify
950
- # # foreign_keys : Dict[str, Union[str, List[str]]]
951
- # # Dictionary mapping source tables to their column(s)
952
- # # Format: {source_table: column_name or [column_names]}
953
- # # default_value : Any, optional
954
- # # Default value for existing rows
955
-
956
- # # Example
957
- # # -------
958
- # # >>> foreign_keys = {"id_manager": ["id", "patient_id"]}
959
- # # >>> db.add_foreign_column("time_table", foreign_keys)
960
- # # """
961
- # # try:
962
- # # for src_table, columns in foreign_keys.items():
963
- # # if isinstance(columns, str):
964
- # # columns = [columns]
965
-
966
- # # for column in columns:
967
- # # # Verify tables and columns exist
968
- # # src_schema = self.get_table_schema(src_table)
969
- # # if column not in src_schema:
970
- # # raise ValueError(
971
- # # f"Column {column} not found in {src_table}"
972
- # # )
973
-
974
- # # current_schema = self.get_table_schema(tgt_table)
975
- # # if column in current_schema:
976
- # # raise ValueError(
977
- # # f"Column {column} already exists in {tgt_table}"
978
- # # )
979
-
980
- # # # Get column type from source table
981
- # # column_type = src_schema[column]
982
-
983
- # # # Create new table schema with foreign key
984
- # # new_columns = {column: column_type}
985
- # # new_columns.update(current_schema)
986
-
987
- # # # Enable foreign keys
988
- # # self.execute("PRAGMA foreign_keys=ON")
989
-
990
- # # # Create temp table and copy data
991
- # # temp_table = f"{tgt_table}_temp"
992
- # # self.create_table(temp_table, new_columns)
993
-
994
- # # # Copy existing data
995
- # # old_cols = ", ".join(current_schema.keys())
996
- # # self.execute(
997
- # # f"INSERT INTO {temp_table} ({old_cols}) SELECT {old_cols} FROM {tgt_table}"
998
- # # )
999
-
1000
- # # # Drop old table and rename new one
1001
- # # self.execute(f"DROP TABLE {tgt_table}")
1002
- # # self.execute(
1003
- # # f"ALTER TABLE {temp_table} RENAME TO {tgt_table}"
1004
- # # )
1005
-
1006
- # # # Add foreign key constraint
1007
- # # query = (
1008
- # # f"ALTER TABLE {tgt_table} ADD FOREIGN KEY ({column}) "
1009
- # # f"REFERENCES {src_table}({column})"
1010
- # # )
1011
- # # self.execute(query)
1012
-
1013
- # # # Update with default value if provided
1014
- # # if default_value is not None:
1015
- # # self.execute(
1016
- # # f"UPDATE {tgt_table} SET {column} = ?",
1017
- # # (default_value,),
1018
- # # )
1019
-
1020
- # # except sqlite3.Error as err:
1021
- # # raise ValueError(f"Failed to add foreign key column: {err}")
1022
-
1023
- # def get_table_names(self) -> List[str]:
1024
- # """Lists all tables in the database.
1025
-
1026
- # Returns
1027
- # -------
1028
- # List[str]
1029
- # Names of all tables in database
1030
-
1031
- # Example
1032
- # -------
1033
- # >>> tables = db.get_table_names()
1034
- # >>> print(tables) # ['users', 'posts', ...]
1035
- # """
1036
- # query = "SELECT name FROM sqlite_master WHERE type='table'"
1037
- # self.cursor.execute(query)
1038
- # return [table[0] for table in self.cursor.fetchall()]
1039
-
1040
- # def get_table_schema(self, table_name: str) -> pd.DataFrame:
1041
- # """Retrieves schema information for specified table.
1042
-
1043
- # Example
1044
- # -------
1045
- # >>> db = BaseSQLiteDB('example.db')
1046
- # >>> info = db.get_table_schema('users')
1047
- # >>> print(info) # Shows column details
1048
-
1049
- # Parameters
1050
- # ----------
1051
- # table_name : str
1052
- # Name of table to analyze
1053
-
1054
- # Returns
1055
- # -------
1056
- # pd.DataFrame
1057
- # DataFrame containing:
1058
- # - cid: Column ID
1059
- # - name: Column name
1060
- # - type: Data type
1061
- # - notnull: NOT NULL constraint
1062
- # - dflt_value: Default value
1063
- # - pk: Primary key flag
1064
- # """
1065
- # query = f"PRAGMA table_info({table_name})"
1066
- # self.cursor.execute(query)
1067
- # columns = ["cid", "name", "type", "notnull", "dflt_value", "pk"]
1068
- # return pd.DataFrame(self.cursor.fetchall(), columns=columns)
1069
-
1070
- # def get_primary_key(self, table_name: str) -> str:
1071
- # schema = self.get_table_schema(table_name)
1072
- # pk_col = schema[schema["pk"] == 1]["name"].values
1073
- # return pk_col[0] if len(pk_col) > 0 else None
1074
-
1075
- # def get_table_stats(self, table_name: str) -> Dict[str, int]:
1076
- # """Retrieves size statistics for a specified database table.
1077
-
1078
- # Example
1079
- # -------
1080
- # >>> db = BaseSQLiteDB('example.db')
1081
- # >>> stats = db.get_table_stats('users')
1082
- # >>> print(stats)
1083
- # {
1084
- # 'pages': 10,
1085
- # 'page_size': 4096,
1086
- # 'total_size': 40960,
1087
- # 'row_count': 1000
1088
- # }
1089
-
1090
- # Parameters
1091
- # ----------
1092
- # table_name : str
1093
- # Name of the table to analyze
1094
-
1095
- # Returns
1096
- # -------
1097
- # Dict[str, int]
1098
- # Dictionary containing:
1099
- # - pages: Number of pages used by table
1100
- # - page_size: Size of each page in bytes
1101
- # - total_size: Total size in bytes (pages * page_size)
1102
- # - row_count: Total number of rows in table
1103
-
1104
- # Raises
1105
- # ------
1106
- # ValueError
1107
- # If size statistics cannot be retrieved
1108
- # sqlite3.Error
1109
- # If database query fails
1110
- # """
1111
- # try:
1112
- # pages = self.cursor.execute(f"PRAGMA page_count").fetchone()[0]
1113
- # page_size = self.cursor.execute(f"PRAGMA page_size").fetchone()[0]
1114
- # row_count = self.get_row_count(table_name)
1115
- # return {
1116
- # "pages": pages,
1117
- # "page_size": page_size,
1118
- # "total_size": pages * page_size,
1119
- # "row_count": row_count,
1120
- # }
1121
- # except sqlite3.Error as err:
1122
- # raise ValueError(f"Failed to get table size: {err}")
1123
-
1124
- # # ----------------------------------------
1125
- # # Row Operations
1126
- # # ----------------------------------------
1127
- # def get_rows(
1128
- # self,
1129
- # table_name: str,
1130
- # columns: List[str] = None,
1131
- # where: str = None,
1132
- # order_by: str = None,
1133
- # limit: Optional[int] = None,
1134
- # offset: Optional[int] = None,
1135
- # return_as: str = "dataframe",
1136
- # ):
1137
- # """Retrieves rows from specified table."""
1138
-
1139
- # if columns is None:
1140
- # columns_str = "*"
1141
- # elif isinstance(columns, str):
1142
- # columns_str = f'"{columns}"'
1143
- # else:
1144
- # columns_str = ", ".join(f'"{col}"' for col in columns)
1145
-
1146
- # try:
1147
- # query_parts = [f"SELECT {columns_str} FROM {table_name}"]
1148
-
1149
- # if where:
1150
- # query_parts.append(f"WHERE {where}")
1151
- # if order_by:
1152
- # query_parts.append(f"ORDER BY {order_by}")
1153
- # if limit is not None:
1154
- # query_parts.append(f"LIMIT {limit}")
1155
- # if offset is not None:
1156
- # query_parts.append(f"OFFSET {offset}")
1157
-
1158
- # query = " ".join(query_parts)
1159
- # self.cursor.execute(query)
1160
-
1161
- # column_names = [
1162
- # description[0] for description in self.cursor.description
1163
- # ]
1164
- # data = self.cursor.fetchall()
1165
-
1166
- # if return_as == "list":
1167
- # return data
1168
- # elif return_as == "dict":
1169
- # return [dict(zip(column_names, row)) for row in data]
1170
- # else:
1171
- # return pd.DataFrame(data, columns=column_names)
1172
-
1173
- # except sqlite3.Error as error:
1174
- # raise sqlite3.Error(
1175
- # f"Query execution failed: {str(error)}"
1176
- # ) from error
1177
-
1178
- # def get_row_count(self, table_name: str = None, where: str = None) -> int:
1179
- # """Counts total number of rows in specified table.
1180
-
1181
- # Example
1182
- # -------
1183
- # >>> db = BaseSQLiteDB('example.db')
1184
- # >>> # Count all rows
1185
- # >>> total = db.get_row_count('users')
1186
- # >>> # Count with condition
1187
- # >>> active_users = db.get_row_count('users', where='status="active"')
1188
-
1189
- # Parameters
1190
- # ----------
1191
- # table_name : str
1192
- # Name of target table
1193
- # where : str, optional
1194
- # SQL WHERE clause for filtering rows
1195
-
1196
- # Returns
1197
- # -------
1198
- # int
1199
- # Number of rows matching criteria
1200
-
1201
- # Raises
1202
- # ------
1203
- # ValueError
1204
- # If table_name is not specified
1205
- # sqlite3.Error
1206
- # If query execution fails
1207
- # """
1208
-
1209
- # if table_name is None:
1210
- # raise ValueError("Table name must be specified")
1211
-
1212
- # query = f"SELECT COUNT(*) FROM {table_name}"
1213
- # if where:
1214
- # query += f" WHERE {where}"
1215
-
1216
- # self.cursor.execute(query)
1217
- # return self.cursor.fetchone()[0]
1218
-
1219
- # # ----------------------------------------
1220
- # # Batch Operations
1221
- # # ----------------------------------------
1222
- # def _run_many(
1223
- # self,
1224
- # sql_command,
1225
- # table_name: str,
1226
- # rows: List[Dict[str, Any]],
1227
- # batch_size: int = 1000,
1228
- # inherit_foreign: bool = True,
1229
- # where: Optional[str] = None,
1230
- # columns: Optional[List[str]] = None,
1231
- # ) -> None:
1232
- # assert sql_command.upper() in [
1233
- # "INSERT",
1234
- # "REPLACE",
1235
- # "INSERT OR REPLACE",
1236
- # "UPDATE",
1237
- # ]
1238
-
1239
- # if not rows:
1240
- # return
1241
-
1242
- # if sql_command.upper() == "UPDATE":
1243
- # valid_columns = (
1244
- # columns if columns else [col for col in rows[0].keys()]
1245
- # )
1246
- # set_clause = ",".join([f"{col}=?" for col in valid_columns])
1247
- # where_clause = where if where else "1=1"
1248
- # query = (
1249
- # f"UPDATE {table_name} SET {set_clause} WHERE {where_clause}"
1250
- # )
1251
-
1252
- # for idx in range(0, len(rows), batch_size):
1253
- # batch = rows[idx : idx + batch_size]
1254
- # values = [
1255
- # tuple([row[col] for col in valid_columns]) for row in batch
1256
- # ]
1257
- # self.executemany(query, values)
1258
- # return
1259
-
1260
- # # Filter rows based on where clause if provided
1261
- # if where:
1262
- # filtered_rows = []
1263
- # for row in rows:
1264
- # try:
1265
- # # Create a temporary query to test the where clause
1266
- # test_query = f"SELECT 1 FROM (SELECT {','.join(f'{k} as {k}' for k in row.keys())}) WHERE {where}"
1267
- # values = tuple(row.values())
1268
- # result = self.execute(test_query, values).fetchone()
1269
- # if result:
1270
- # filtered_rows.append(row)
1271
- # except Exception as e:
1272
- # print(
1273
- # f"Warning: Where clause evaluation failed for row: {e}"
1274
- # )
1275
- # rows = filtered_rows
1276
-
1277
- # # Rest of the original function...
1278
- # schema = self.get_table_schema(table_name)
1279
- # table_columns = set(schema["name"])
1280
- # valid_columns = [col for col in rows[0].keys()]
1281
-
1282
- # if inherit_foreign:
1283
- # fk_query = f"PRAGMA foreign_key_list({table_name})"
1284
- # foreign_keys = self.execute(fk_query).fetchall()
1285
-
1286
- # for row in rows:
1287
- # for fk in foreign_keys:
1288
- # ref_table, from_col, to_col = fk[2], fk[3], fk[4]
1289
- # if from_col not in row or row[from_col] is None:
1290
- # if to_col in row:
1291
- # query = f"SELECT {from_col} FROM {ref_table} WHERE {to_col} = ?"
1292
- # result = self.execute(
1293
- # query, (row[to_col],)
1294
- # ).fetchone()
1295
- # if result:
1296
- # row[from_col] = result[0]
1297
-
1298
- # columns = valid_columns
1299
- # placeholders = ",".join(["?" for _ in columns])
1300
- # query = f"{sql_command} INTO {table_name} ({','.join(columns)}) VALUES ({placeholders})"
1301
-
1302
- # for idx in range(0, len(rows), batch_size):
1303
- # batch = rows[idx : idx + batch_size]
1304
- # values = [[row.get(col) for col in valid_columns] for row in batch]
1305
- # self.executemany(query, values)
1306
-
1307
- # def update_many(
1308
- # self,
1309
- # table_name: str,
1310
- # rows: List[Dict[str, Any]],
1311
- # batch_size: int = 1000,
1312
- # where: Optional[str] = None,
1313
- # columns: Optional[List[str]] = None,
1314
- # ) -> None:
1315
- # with self.transaction():
1316
- # self._run_many(
1317
- # sql_command="UPDATE",
1318
- # table_name=table_name,
1319
- # rows=rows,
1320
- # batch_size=batch_size,
1321
- # inherit_foreign=False,
1322
- # where=where,
1323
- # columns=columns,
1324
- # )
1325
-
1326
- # def insert_many(
1327
- # self,
1328
- # table_name: str,
1329
- # rows: List[Dict[str, Any]],
1330
- # batch_size: int = 1000,
1331
- # inherit_foreign: bool = True,
1332
- # where: Optional[str] = None,
1333
- # ) -> None:
1334
- # with self.transaction():
1335
- # self._run_many(
1336
- # sql_command="INSERT",
1337
- # table_name=table_name,
1338
- # rows=rows,
1339
- # batch_size=batch_size,
1340
- # inherit_foreign=inherit_foreign,
1341
- # where=where,
1342
- # )
1343
-
1344
- # def replace_many(
1345
- # self,
1346
- # table_name: str,
1347
- # rows: List[Dict[str, Any]],
1348
- # batch_size: int = 1000,
1349
- # inherit_foreign: bool = True,
1350
- # where: Optional[str] = None,
1351
- # ) -> None:
1352
- # with self.transaction():
1353
- # self._run_many(
1354
- # sql_command="REPLACE",
1355
- # table_name=table_name,
1356
- # rows=rows,
1357
- # batch_size=batch_size,
1358
- # inherit_foreign=inherit_foreign,
1359
- # where=where,
1360
- # )
1361
-
1362
- # def delete_where(
1363
- # self, table_name: str, where: str, limit: Optional[int] = None
1364
- # ) -> None:
1365
- # with self.transaction():
1366
- # query = f"DELETE FROM {table_name} WHERE {where}"
1367
- # if limit is not None:
1368
- # query += f" LIMIT {limit}"
1369
- # self.execute(query)
1370
-
1371
- # def update_where(
1372
- # self,
1373
- # table_name: str,
1374
- # updates: Dict[str, Any],
1375
- # where: str,
1376
- # limit: Optional[int] = None,
1377
- # ) -> None:
1378
- # with self.transaction():
1379
- # set_clause = ", ".join([f"{col} = ?" for col in updates.keys()])
1380
- # query = f"UPDATE {table_name} SET {set_clause} WHERE {where}"
1381
- # if limit is not None:
1382
- # query += f" LIMIT {limit}"
1383
- # self.execute(query, tuple(updates.values()))
1384
-
1385
- # # def update_many(
1386
- # # self,
1387
- # # table_name: str,
1388
- # # rows: List[Dict[str, Any]],
1389
- # # batch_size: int = 1000,
1390
- # # where: Optional[str] = None,
1391
- # # columns: Optional[List[str]] = None,
1392
- # # ) -> None:
1393
- # # self._run_many(
1394
- # # sql_command="UPDATE",
1395
- # # table_name=table_name,
1396
- # # rows=rows,
1397
- # # batch_size=batch_size,
1398
- # # inherit_foreign=False,
1399
- # # where=where,
1400
- # # columns=columns,
1401
- # # )
1402
-
1403
- # # def insert_many(
1404
- # # self,
1405
- # # table_name: str,
1406
- # # rows: List[Dict[str, Any]],
1407
- # # batch_size: int = 1000,
1408
- # # inherit_foreign: bool = True,
1409
- # # where: Optional[str] = None,
1410
- # # ) -> None:
1411
- # # self._run_many(
1412
- # # sql_command="INSERT",
1413
- # # table_name=table_name,
1414
- # # rows=rows,
1415
- # # batch_size=batch_size,
1416
- # # inherit_foreign=inherit_foreign,
1417
- # # where=where,
1418
- # # )
1419
-
1420
- # # def replace_many(
1421
- # # self,
1422
- # # table_name: str,
1423
- # # rows: List[Dict[str, Any]],
1424
- # # batch_size: int = 1000,
1425
- # # inherit_foreign: bool = True,
1426
- # # where: Optional[str] = None,
1427
- # # ) -> None:
1428
- # # self._run_many(
1429
- # # sql_command="REPLACE",
1430
- # # table_name=table_name,
1431
- # # rows=rows,
1432
- # # batch_size=batch_size,
1433
- # # inherit_foreign=inherit_foreign,
1434
- # # where=where,
1435
- # # )
1436
-
1437
- # # def delete_where(
1438
- # # self, table_name: str, where: str, limit: Optional[int] = None
1439
- # # ) -> None:
1440
- # # """Deletes rows matching condition with optional limit.
1441
-
1442
- # # Example
1443
- # # -------
1444
- # # >>> db.delete_where('users', "age < 18")
1445
- # # >>> db.delete_where('logs', "timestamp < '2024-01-01'", limit=1000)
1446
-
1447
- # # Parameters
1448
- # # ----------
1449
- # # table_name : str
1450
- # # Name of target table
1451
- # # where : str
1452
- # # SQL WHERE clause for filtering rows to delete
1453
- # # limit : Optional[int], optional
1454
- # # Maximum number of rows to delete in single operation
1455
-
1456
- # # Raises
1457
- # # ------
1458
- # # ValueError
1459
- # # If deletion fails
1460
- # # sqlite3.Error
1461
- # # If SQL execution fails
1462
- # # """
1463
- # # try:
1464
- # # query = f"DELETE FROM {table_name} WHERE {where}"
1465
- # # if limit is not None:
1466
- # # query += f" LIMIT {limit}"
1467
- # # self.execute(query)
1468
- # # except sqlite3.Error as err:
1469
- # # raise ValueError(f"Failed to delete rows: {err}")
1470
-
1471
- # # def update_where(
1472
- # # self,
1473
- # # table_name: str,
1474
- # # updates: Dict[str, Any],
1475
- # # where: str,
1476
- # # limit: Optional[int] = None,
1477
- # # ) -> None:
1478
- # # """Updates rows matching condition with optional limit.
1479
-
1480
- # # Example
1481
- # # -------
1482
- # # >>> db.update_where(
1483
- # # ... 'users',
1484
- # # ... {'status': 'active', 'last_login': '2024-01-01'},
1485
- # # ... "age >= 18",
1486
- # # ... limit=1000
1487
- # # ... )
1488
-
1489
- # # Parameters
1490
- # # ----------
1491
- # # table_name : str
1492
- # # Name of target table
1493
- # # updates : Dict[str, Any]
1494
- # # Dictionary of column names and new values
1495
- # # where : str
1496
- # # SQL WHERE clause for filtering rows to update
1497
- # # limit : Optional[int], optional
1498
- # # Maximum number of rows to update in single operation
1499
-
1500
- # # Raises
1501
- # # ------
1502
- # # ValueError
1503
- # # If update fails
1504
- # # sqlite3.Error
1505
- # # If SQL execution fails
1506
- # # """
1507
- # # try:
1508
- # # set_clause = ", ".join([f"{col} = ?" for col in updates.keys()])
1509
- # # query = f"UPDATE {table_name} SET {set_clause} WHERE {where}"
1510
- # # if limit is not None:
1511
- # # query += f" LIMIT {limit}"
1512
- # # self.execute(query, tuple(updates.values()))
1513
- # # except sqlite3.Error as err:
1514
- # # raise ValueError(f"Failed to update rows: {err}")
1515
-
1516
- # # ----------------------------------------
1517
- # # BLOB Operations
1518
- # # ----------------------------------------
1519
- # def save_array(
1520
- # self,
1521
- # table_name: str,
1522
- # data: np.ndarray,
1523
- # column: str = "data",
1524
- # ids: Optional[Union[int, List[int]]] = None,
1525
- # where: str = None,
1526
- # additional_columns: Dict[str, Any] = None,
1527
- # batch_size: int = 1000,
1528
- # ) -> None:
1529
- # """Stores NumPy array as BLOB with metadata in SQLite database."""
1530
- # with self.transaction():
1531
- # if not isinstance(data, (np.ndarray, list)):
1532
- # raise ValueError("Input must be a NumPy array or list of arrays")
1533
-
1534
- # try:
1535
- # if ids is not None:
1536
- # if isinstance(ids, int):
1537
- # ids = [ids]
1538
- # data = [data]
1539
- # if len(ids) != len(data):
1540
- # raise ValueError(
1541
- # "Length of ids must match number of arrays"
1542
- # )
1543
-
1544
- # for id_, arr in zip(ids, data):
1545
- # if not isinstance(arr, np.ndarray):
1546
- # raise ValueError(
1547
- # f"Element for id {id_} must be a NumPy array"
1548
- # )
1549
-
1550
- # binary = arr.tobytes()
1551
- # columns = [column, f"{column}_dtype", f"{column}_shape"]
1552
- # values = [binary, str(arr.dtype), str(arr.shape)]
1553
-
1554
- # if additional_columns:
1555
- # columns = list(additional_columns.keys()) + columns
1556
- # values = list(additional_columns.values()) + values
1557
-
1558
- # update_cols = [f"{col}=?" for col in columns]
1559
- # query = f"UPDATE {table_name} SET {','.join(update_cols)} WHERE id=?"
1560
- # values.append(id_)
1561
- # self.execute(query, tuple(values))
1562
-
1563
- # else:
1564
- # if not isinstance(data, np.ndarray):
1565
- # raise ValueError("Single input must be a NumPy array")
1566
-
1567
- # binary = data.tobytes()
1568
- # columns = [column, f"{column}_dtype", f"{column}_shape"]
1569
- # values = [binary, str(data.dtype), str(data.shape)]
1570
-
1571
- # if additional_columns:
1572
- # columns = list(additional_columns.keys()) + columns
1573
- # values = list(additional_columns.values()) + values
1574
-
1575
- # if where is not None:
1576
- # update_cols = [f"{col}=?" for col in columns]
1577
- # query = f"UPDATE {table_name} SET {','.join(update_cols)} WHERE {where}"
1578
- # self.execute(query, tuple(values))
1579
- # else:
1580
- # placeholders = ",".join(["?" for _ in columns])
1581
- # columns_str = ",".join(columns)
1582
- # query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders})"
1583
- # self.execute(query, tuple(values))
1584
-
1585
- # except Exception as err:
1586
- # raise ValueError(f"Failed to save array: {err}")
1587
-
1588
- # def load_array(
1589
- # self,
1590
- # table_name: str,
1591
- # column: str,
1592
- # ids: Union[int, List[int], str] = "all",
1593
- # where: str = None,
1594
- # order_by: str = None,
1595
- # batch_size: int = 128,
1596
- # dtype: np.dtype = None,
1597
- # shape: Optional[Tuple] = None,
1598
- # ) -> Optional[np.ndarray]:
1599
- # """Loads array data from SQLite database with metadata support."""
1600
- # try:
1601
- # if ids == "all":
1602
- # query = f"SELECT id FROM {table_name}"
1603
- # if where:
1604
- # query += f" WHERE {where}"
1605
- # self.cursor.execute(query)
1606
- # ids = [row[0] for row in self.cursor.fetchall()]
1607
- # elif isinstance(ids, int):
1608
- # ids = [ids]
1609
-
1610
- # # Create mapping of id to data
1611
- # id_to_data = {}
1612
- # unique_ids = list(set(ids))
1613
-
1614
- # for idx in range(0, len(unique_ids), batch_size):
1615
- # batch_ids = unique_ids[idx : idx + batch_size]
1616
- # placeholders = ",".join("?" for _ in batch_ids)
1617
-
1618
- # try:
1619
- # query = f"""
1620
- # SELECT id, {column},
1621
- # {column}_dtype,
1622
- # {column}_shape
1623
- # FROM {table_name}
1624
- # WHERE id IN ({placeholders})
1625
- # """
1626
- # self.cursor.execute(query, tuple(batch_ids))
1627
- # has_metadata = True
1628
- # except sqlite3.OperationalError:
1629
- # query = f"SELECT id, {column} FROM {table_name} WHERE id IN ({placeholders})"
1630
- # self.cursor.execute(query, tuple(batch_ids))
1631
- # has_metadata = False
1632
-
1633
- # if where:
1634
- # query += f" AND {where}"
1635
- # if order_by:
1636
- # query += f" ORDER BY {order_by}"
1637
-
1638
- # results = self.cursor.fetchall()
1639
- # if results:
1640
- # for result in results:
1641
- # if has_metadata:
1642
- # id_val, blob, dtype_str, shape_str = result
1643
- # data = np.frombuffer(
1644
- # blob, dtype=np.dtype(dtype_str)
1645
- # ).reshape(eval(shape_str))
1646
- # else:
1647
- # id_val, blob = result
1648
- # data = (
1649
- # np.frombuffer(blob, dtype=dtype)
1650
- # if dtype
1651
- # else np.frombuffer(blob)
1652
- # )
1653
- # if shape:
1654
- # data = data.reshape(shape)
1655
- # id_to_data[id_val] = data
1656
-
1657
- # # Maintain input order and duplicates
1658
- # all_data = [
1659
- # id_to_data[id_val] for id_val in ids if id_val in id_to_data
1660
- # ]
1661
- # return np.stack(all_data, axis=0) if all_data else None
1662
-
1663
- # except Exception as err:
1664
- # raise ValueError(f"Failed to load array: {err}")
1665
-
1666
- # def binary_to_array(
1667
- # self,
1668
- # binary_data,
1669
- # dtype_str=None,
1670
- # shape_str=None,
1671
- # dtype=None,
1672
- # shape=None,
1673
- # ):
1674
- # """Convert binary data into numpy array."""
1675
- # if binary_data is None:
1676
- # return None
1677
-
1678
- # if dtype_str and shape_str:
1679
- # return np.frombuffer(
1680
- # binary_data, dtype=np.dtype(dtype_str)
1681
- # ).reshape(eval(shape_str))
1682
- # elif dtype and shape:
1683
- # return np.frombuffer(binary_data, dtype=dtype).reshape(shape)
1684
- # return binary_data
1685
-
1686
- # def get_array_dict(self, df, columns=None, dtype=None, shape=None):
1687
- # """Return dictionary of concatenated arrays for batch processing.
1688
-
1689
- # Returns:
1690
- # dict: {column_name: numpy_array} where each array has shape (n_samples, *data_shape)
1691
- # """
1692
- # result = {}
1693
- # if columns is None:
1694
- # columns = [
1695
- # col
1696
- # for col in df.columns
1697
- # if not (col.endswith("_dtype") or col.endswith("_shape"))
1698
- # ]
1699
-
1700
- # for col in columns:
1701
- # if f"{col}_dtype" in df.columns and f"{col}_shape" in df.columns:
1702
- # arrays = [
1703
- # self.binary_to_array(
1704
- # row[col], row[f"{col}_dtype"], row[f"{col}_shape"]
1705
- # )
1706
- # for _, row in df.iterrows()
1707
- # ]
1708
- # elif dtype and shape:
1709
- # arrays = [
1710
- # self.binary_to_array(x, dtype=dtype, shape=shape)
1711
- # for x in df[col]
1712
- # ]
1713
- # result[col] = np.stack(arrays)
1714
-
1715
- # return result
1716
-
1717
- # def decode_array_columns(self, df, columns=None, dtype=None, shape=None):
1718
- # """Decode binary columns to numpy arrays within DataFrame for exploration.
1719
-
1720
- # Modifies DataFrame in-place, replacing binary data with numpy arrays.
1721
- # Returns modified DataFrame.
1722
- # """
1723
- # if columns is None:
1724
- # columns = [
1725
- # col
1726
- # for col in df.columns
1727
- # if not (col.endswith("_dtype") or col.endswith("_shape"))
1728
- # ]
1729
-
1730
- # for col in columns:
1731
- # if f"{col}_dtype" in df.columns and f"{col}_shape" in df.columns:
1732
- # df[col] = df.apply(
1733
- # lambda row: self.binary_to_array(
1734
- # row[col], row[f"{col}_dtype"], row[f"{col}_shape"]
1735
- # ),
1736
- # axis=1,
1737
- # )
1738
- # elif dtype and shape:
1739
- # df[col] = df[col].apply(
1740
- # lambda x: self.binary_to_array(x, dtype=dtype, shape=shape)
1741
- # )
1742
- # return df
1743
-
1744
- # # ----------------------------------------
1745
- # # Import/Export Operations
1746
- # # ----------------------------------------
1747
- # def load_from_csv(
1748
- # self,
1749
- # table_name: str,
1750
- # csv_path: str,
1751
- # if_exists: str = "append",
1752
- # batch_size: int = 10_000,
1753
- # chunk_size: int = 100_000,
1754
- # ) -> None:
1755
- # """Imports CSV data into table with batch processing.
1756
-
1757
- # Example
1758
- # -------
1759
- # >>> db.load_from_csv(
1760
- # ... 'users',
1761
- # ... 'data.csv',
1762
- # ... if_exists='replace',
1763
- # ... batch_size=5000
1764
- # ... )
1765
-
1766
- # Parameters
1767
- # ----------
1768
- # table_name : str
1769
- # Name of target table
1770
- # csv_path : str
1771
- # Path to CSV file
1772
- # if_exists : str, optional
1773
- # How to behave if table exists: 'fail', 'replace', 'append' (default: 'append')
1774
- # batch_size : int, optional
1775
- # Number of rows per batch for SQL insert (default: 10,000)
1776
- # chunk_size : int, optional
1777
- # Number of rows to read at once from CSV (default: 100,000)
1778
-
1779
- # Raises
1780
- # ------
1781
- # ValueError
1782
- # If file or table operations fail
1783
- # FileNotFoundError
1784
- # If CSV file does not exist
1785
- # """
1786
- # with self.transaction():
1787
- # try:
1788
- # for chunk in pd.read_csv(csv_path, chunksize=chunk_size):
1789
- # chunk.to_sql(
1790
- # table_name,
1791
- # self.conn,
1792
- # if_exists=if_exists,
1793
- # index=False,
1794
- # chunksize=batch_size,
1795
- # )
1796
- # if_exists = "append"
1797
- # except FileNotFoundError:
1798
- # raise FileNotFoundError(f"CSV file not found: {csv_path}")
1799
- # except Exception as err:
1800
- # raise ValueError(f"Failed to import from CSV: {err}")
1801
-
1802
- # def save_to_csv(
1803
- # self,
1804
- # table_name: str,
1805
- # output_path: str,
1806
- # columns: List[str] = ["*"],
1807
- # where: str = None,
1808
- # batch_size: int = 10_000,
1809
- # ) -> None:
1810
- # """Exports table data to CSV file with batch processing.
1811
-
1812
- # Example
1813
- # -------
1814
- # >>> db.save_to_csv(
1815
- # ... 'users',
1816
- # ... 'output.csv',
1817
- # ... columns=['id', 'name'],
1818
- # ... where="age >= 18",
1819
- # ... batch_size=5000
1820
- # ... )
1821
-
1822
- # Parameters
1823
- # ----------
1824
- # table_name : str
1825
- # Name of source table
1826
- # output_path : str
1827
- # Path for output CSV file
1828
- # columns : List[str], optional
1829
- # Columns to export (default: ["*"])
1830
- # where : str, optional
1831
- # SQL WHERE clause for filtering rows
1832
- # batch_size : int, optional
1833
- # Number of rows per batch (default: 10,000)
1834
-
1835
- # Raises
1836
- # ------
1837
- # ValueError
1838
- # If export operation fails
1839
- # PermissionError
1840
- # If writing to output path fails
1841
- # """
1842
- # try:
1843
- # # First batch
1844
- # df = self.get_rows(
1845
- # columns=columns,
1846
- # table_name=table_name,
1847
- # where=where,
1848
- # limit=batch_size,
1849
- # offset=0,
1850
- # )
1851
- # df.to_csv(output_path, index=False, mode="w")
1852
-
1853
- # # Subsequent batches
1854
- # offset = batch_size
1855
- # while len(df) == batch_size:
1856
- # df = self.get_rows(
1857
- # columns=columns,
1858
- # table_name=table_name,
1859
- # where=where,
1860
- # limit=batch_size,
1861
- # offset=offset,
1862
- # )
1863
- # if len(df) > 0:
1864
- # df.to_csv(output_path, index=False, mode="a", header=False)
1865
- # offset += batch_size
1866
- # except PermissionError:
1867
- # raise PermissionError(f"Cannot write to: {output_path}")
1868
- # except Exception as err:
1869
- # raise ValueError(f"Failed to export to CSV: {err}")
1870
-
1871
- # # ----------------------------------------
1872
- # # Database Maintenance
1873
- # # ----------------------------------------
1874
- # @contextlib.contextmanager
1875
- # def maintenance_lock(self) -> ContextManager[None]:
1876
- # """Acquires maintenance lock for disruptive operations.
1877
-
1878
- # Example
1879
- # -------
1880
- # >>> with db.maintenance_lock():
1881
- # ... db.vacuum()
1882
- # ... db.optimize()
1883
- # """
1884
- # if not self._maintenance_lock.acquire(timeout=300):
1885
- # raise TimeoutError("Could not acquire maintenance lock")
1886
- # try:
1887
- # yield
1888
- # finally:
1889
- # self._maintenance_lock.release()
1890
-
1891
- # def backup(
1892
- # self,
1893
- # backup_path: str,
1894
- # pages: int = -1,
1895
- # progress: Optional[
1896
- # Callable[[sqlite3.Connection, int, int], None]
1897
- # ] = None,
1898
- # ) -> None:
1899
- # """Creates database backup with optional progress tracking.
1900
-
1901
- # Example
1902
- # -------
1903
- # >>> def show_progress(conn, remaining, total):
1904
- # ... print(f"Progress: {((total-remaining)/total)*100:.1f}%")
1905
- # >>> db.backup('backup.db', progress=show_progress)
1906
-
1907
- # Parameters
1908
- # ----------
1909
- # backup_path : str
1910
- # Path for backup database file
1911
- # pages : int, optional
1912
- # Number of pages to copy (-1 for all, default: -1)
1913
- # progress : Optional[Callable[[sqlite3.Connection, int, int], None]], optional
1914
- # Callback function for progress updates
1915
-
1916
- # Raises
1917
- # ------
1918
- # ValueError
1919
- # If backup fails
1920
- # sqlite3.Error
1921
- # If database operation fails
1922
- # """
1923
- # with self.maintenance_lock():
1924
- # try:
1925
-
1926
- # def _progress(
1927
- # status: sqlite3.Connection, remaining: int, total: int
1928
- # ) -> None:
1929
- # if progress:
1930
- # progress(total - remaining, total)
1931
-
1932
- # backup_conn = sqlite3.connect(backup_path)
1933
- # with backup_conn:
1934
- # self.conn.backup(
1935
- # backup_conn, pages=pages, progress=_progress
1936
- # )
1937
- # backup_conn.close()
1938
- # except (sqlite3.Error, Exception) as err:
1939
- # raise ValueError(f"Failed to create backup: {err}")
1940
-
1941
- # def vacuum(self, into: Optional[str] = None) -> None:
1942
- # """Rebuilds database file to reclaim unused space.
1943
-
1944
- # Example
1945
- # -------
1946
- # >>> db.vacuum() # Regular vacuum
1947
- # >>> db.vacuum(into='optimized.db') # Vacuum into new file
1948
-
1949
- # Parameters
1950
- # ----------
1951
- # into : Optional[str], optional
1952
- # Path to new database file for vacuum (default: None)
1953
-
1954
- # Raises
1955
- # ------
1956
- # sqlite3.Error
1957
- # If vacuum operation fails
1958
- # ValueError
1959
- # If target path is invalid
1960
- # """
1961
- # with self.maintenance_lock():
1962
- # try:
1963
- # if into:
1964
- # self.execute(f"VACUUM INTO '{into}'")
1965
- # else:
1966
- # self.execute("VACUUM")
1967
- # except sqlite3.Error as err:
1968
- # raise ValueError(f"Vacuum operation failed: {err}")
1969
-
1970
- # def optimize(self, analyze: bool = True) -> None:
1971
- # """Optimizes database performance with optional analysis.
1972
-
1973
- # Example
1974
- # -------
1975
- # >>> db.optimize() # Full optimization
1976
- # >>> db.optimize(analyze=False) # Skip analysis phase
1977
-
1978
- # Parameters
1979
- # ----------
1980
- # analyze : bool, optional
1981
- # Whether to run ANALYZE after optimization (default: True)
1982
-
1983
- # Raises
1984
- # ------
1985
- # ValueError
1986
- # If optimization fails
1987
- # sqlite3.Error
1988
- # If database operations fail
1989
- # """
1990
- # with self.maintenance_lock():
1991
- # try:
1992
- # self.execute("PRAGMA optimize")
1993
- # self.vacuum()
1994
- # if analyze:
1995
- # self.execute("ANALYZE")
1996
- # except sqlite3.Error as err:
1997
- # raise ValueError(f"Failed to optimize database: {err}")
1998
-
1999
- # # # without non-null count
2000
- # # def get_database_size(self, format: str = "bytes") -> Union[int, str]:
2001
- # # """Gets database file size in specified format.
2002
-
2003
- # # Example
2004
- # # -------
2005
- # # >>> size_bytes = db.get_database_size()
2006
- # # >>> size_mb = db.get_database_size(format='mb')
2007
- # # >>> print(f"Database size: {size_mb} MB")
2008
-
2009
- # # Parameters
2010
- # # ----------
2011
- # # format : str, optional
2012
- # # Output format: 'bytes', 'kb', 'mb', 'gb' (default: 'bytes')
2013
-
2014
- # # Returns
2015
- # # -------
2016
- # # Union[int, str]
2017
- # # File size in requested format
2018
-
2019
- # # Raises
2020
- # # ------
2021
- # # FileNotFoundError
2022
- # # If database file doesn't exist
2023
- # # ValueError
2024
- # # If format is invalid
2025
- # # """
2026
- # # if not os.path.exists(self.db_path):
2027
- # # raise FileNotFoundError(f"Database file not found: {self.db_path}")
2028
-
2029
- # # size_bytes = os.path.getsize(self.db_path)
2030
-
2031
- # # format_map = {
2032
- # # "bytes": lambda x: x,
2033
- # # "kb": lambda x: f"{x / 1024:.2f} KB",
2034
- # # "mb": lambda x: f"{x / (1024 * 1024):.2f} MB",
2035
- # # "gb": lambda x: f"{x / (1024 * 1024 * 1024):.2f} GB",
2036
- # # }
2037
-
2038
- # # if format.lower() not in format_map:
2039
- # # raise ValueError(
2040
- # # f"Invalid format. Choose from: {list(format_map.keys())}"
2041
- # # )
2042
-
2043
- # # return format_map[format.lower()](size_bytes)
2044
-
2045
- # def get_summaries(
2046
- # self,
2047
- # table_names: Optional[List[str]] = None,
2048
- # verbose: bool = True,
2049
- # limit: int = 5,
2050
- # ) -> Dict[str, pd.DataFrame]:
2051
-
2052
- # # Handling table_names
2053
- # if table_names is None:
2054
- # table_names = self.get_table_names()
2055
- # if isinstance(table_names, str):
2056
- # table_names = [table_names]
2057
-
2058
- # sample_tables = {}
2059
- # for table_name in table_names:
2060
- # columns = self.get_table_schema(table_name)
2061
- # table_sample = self.get_rows(table_name=table_name, limit=limit)
2062
-
2063
- # for column in table_sample.columns:
2064
- # print(column)
2065
- # if table_sample[column].dtype == object:
2066
-
2067
- # try:
2068
- # # Try parsing as datetime
2069
- # pd.to_datetime(table_sample[column])
2070
- # continue
2071
- # except:
2072
- # pass
2073
-
2074
- # # Check if all values are strings
2075
- # if (
2076
- # table_sample[column]
2077
- # .apply(lambda x: isinstance(x, str))
2078
- # .all()
2079
- # ):
2080
- # continue
2081
-
2082
- # sample_tables[table_name] = table_sample
2083
-
2084
- # return sample_tables
2085
-
2086
- # # def print_summary(
2087
- # # self,
2088
- # # table_names: Optional[List[str]] = None,
2089
- # # verbose: bool = True,
2090
- # # limit: int = 5,
2091
- # # ):
2092
- # # """Prints a friendly summary of all tables in the database."""
2093
- # # with pd.option_context(
2094
- # # "display.max_columns",
2095
- # # None,
2096
- # # "display.width",
2097
- # # None,
2098
- # # "display.max_colwidth",
2099
- # # None,
2100
- # # ):
2101
-
2102
- # # summaries = self.get_summaries(
2103
- # # table_names=table_names, verbose=verbose, limit=limit
2104
- # # )
2105
-
2106
- # # print("\n=== Database Summary ===")
2107
- # # for table_name, df_sample in summaries.items():
2108
- # # print("-" * (len(table_name) + 7))
2109
- # # print(f"Table: {table_name}")
2110
- # # print("-" * (len(table_name) + 7))
2111
- # # if df_sample.empty:
2112
- # # print("Empty table")
2113
- # # else:
2114
- # # # Get full table for accurate counts
2115
- # # with self.lock:
2116
- # # full_df = pd.read_sql_query(
2117
- # # f"SELECT * FROM {table_name}", self.conn
2118
- # # )
2119
- # # print(f"\nSample rows ({len(df_sample)} shown):\n")
2120
- # # dtype_df = pd.DataFrame(
2121
- # # [df_sample.dtypes], index=["dtype"]
2122
- # # )
2123
- # # non_null_counts = pd.DataFrame(
2124
- # # [full_df.notna().sum()], index=["non-null count"]
2125
- # # )
2126
- # # print(pd.concat([df_sample, dtype_df, non_null_counts]))
2127
-
2128
- # # print()
2129
-
2130
- # @property
2131
- # def summary(self):
2132
- # self()
2133
-
2134
- #
2135
-
2136
- # EOF