scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,376 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 08:18:35 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """Chrome PDF Viewer Download Strategy"""
15
+
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from playwright.async_api import BrowserContext
20
+
21
+ from scitex import logging
22
+ from scitex.browser.stealth import HumanBehavior
23
+ from scitex.scholar.browser import (
24
+ browser_logger,
25
+ click_center_async,
26
+ click_download_for_chrome_pdf_viewer_async,
27
+ detect_chrome_pdf_viewer_async,
28
+ show_grid_async,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ async def try_download_chrome_pdf_viewer_async(
35
+ context: BrowserContext,
36
+ pdf_url: str,
37
+ output_path: Path,
38
+ func_name: str = "ScholarPDFDownloader",
39
+ ) -> Optional[Path]:
40
+ """Download PDF from Chrome PDF viewer with human-like behavior."""
41
+ page = None
42
+ try:
43
+ # Ensure output_path is Path object
44
+ if not isinstance(output_path, Path):
45
+ output_path = Path(output_path)
46
+
47
+ logger.debug(f"{func_name}: Chrome PDF: Starting download")
48
+ logger.debug(f" URL: {pdf_url} (type: {type(pdf_url)})")
49
+ logger.debug(f" Output: {output_path} (type: {type(output_path)})")
50
+ logger.debug(f" Downloader: {func_name} (type: {type(func_name)})")
51
+
52
+ page = await context.new_page()
53
+
54
+ # Get browser's download directory and capture files before download
55
+ import time
56
+
57
+ from scitex.scholar.config import ScholarConfig
58
+
59
+ config = ScholarConfig()
60
+ browser_downloads_dir = config.get_library_downloads_dir()
61
+ files_before = (
62
+ set(browser_downloads_dir.glob("*"))
63
+ if browser_downloads_dir.exists()
64
+ else set()
65
+ )
66
+ download_start_time = time.time()
67
+ logger.info(
68
+ f"{func_name}: Monitoring {browser_downloads_dir} ({len(files_before)} files)"
69
+ )
70
+
71
+ # Step 1: Navigate and wait for networkidle
72
+ await browser_logger.debug(
73
+ page, f"{func_name}: Chrome PDF: Navigating to URL..."
74
+ )
75
+ await browser_logger.info(
76
+ page,
77
+ f"{func_name}: Chrome PDF: Navigating to {str(pdf_url)[:60]}...",
78
+ )
79
+ # Create HumanBehavior instance for delays
80
+ human = HumanBehavior()
81
+ await human.random_delay_async(1000, 2000, page=page)
82
+
83
+ # Navigate and wait for initial networkidle
84
+ await page.goto(str(pdf_url), wait_until="networkidle", timeout=60_000)
85
+ await browser_logger.debug(
86
+ page,
87
+ f"{func_name}: Chrome PDF: Loaded page at {str(page.url)}",
88
+ )
89
+ await browser_logger.info(
90
+ page,
91
+ f"{func_name}: Chrome PDF: Initial load at {str(page.url)[:80]}",
92
+ )
93
+
94
+ # Step 2: Wait for PDF rendering and any post-load network activity
95
+ await browser_logger.debug(
96
+ page,
97
+ f"{func_name}: Chrome PDF: Waiting for PDF rendering...",
98
+ )
99
+ await browser_logger.info(
100
+ page,
101
+ f"{func_name}: Chrome PDF: Waiting for PDF rendering (networkidle)...",
102
+ )
103
+ try:
104
+ # Wait for network to be fully idle (catches post-load PDF requests)
105
+ await page.wait_for_load_state("networkidle", timeout=30_000)
106
+ await browser_logger.info(
107
+ page,
108
+ f"{func_name}: Chrome PDF: Network idle, PDF should be rendered",
109
+ )
110
+ await browser_logger.info(
111
+ page,
112
+ f"{func_name}: Chrome PDF: ✓ Network idle, PDF rendered",
113
+ )
114
+ await page.wait_for_timeout(2000)
115
+ except Exception as e:
116
+ await browser_logger.debug(
117
+ page,
118
+ f"{func_name}: Network idle timeout (non-fatal): {e}",
119
+ )
120
+ await browser_logger.info(
121
+ page,
122
+ f"{func_name}: Chrome PDF: Network still active, continuing anyway",
123
+ )
124
+ await page.wait_for_timeout(2000)
125
+
126
+ # Step 2.5: Extra wait for PDF viewer iframe/embed to fully load
127
+ # Chrome PDF viewer can take additional time to initialize
128
+ await browser_logger.info(
129
+ page,
130
+ f"{func_name}: Chrome PDF: Waiting extra for PDF viewer to initialize (10s)...",
131
+ )
132
+ await page.wait_for_timeout(10000) # Additional 10 seconds
133
+
134
+ # Step 3: Detect PDF viewer
135
+ await browser_logger.debug(
136
+ page, f"{func_name}: Chrome PDF: Detecting PDF viewer..."
137
+ )
138
+ await browser_logger.info(
139
+ page, f"{func_name}: Chrome PDF: Detecting PDF viewer..."
140
+ )
141
+ if not await detect_chrome_pdf_viewer_async(page):
142
+ await browser_logger.warning(
143
+ page,
144
+ f"{func_name}: Chrome PDF: No PDF viewer detected at {str(page.url)}",
145
+ )
146
+ await browser_logger.warning(
147
+ page,
148
+ f"{func_name}: Chrome PDF: ✗ No PDF viewer detected!",
149
+ )
150
+ await page.wait_for_timeout(2000) # Show message for 2s
151
+ await page.close()
152
+ return None
153
+
154
+ # Step 4: PDF viewer detected!
155
+ await browser_logger.info(
156
+ page,
157
+ f"{func_name}: Chrome PDF: PDF viewer detected, attempting download...",
158
+ )
159
+ await browser_logger.info(
160
+ page, f"{func_name}: Chrome PDF: ✓ PDF viewer detected!"
161
+ )
162
+
163
+ # Wait for PDF to fully render for visual feedback (especially in interactive mode)
164
+ await browser_logger.info(
165
+ page,
166
+ f"{func_name}: Chrome PDF: Waiting for PDF to render (5s)...",
167
+ )
168
+ await page.wait_for_timeout(5000) # 5 seconds for visual confirmation
169
+ await human.random_delay_async(1000, 2000, page=page)
170
+
171
+ # Step 5: Show grid and click center
172
+ await browser_logger.info(
173
+ page, f"{func_name}: Chrome PDF: Showing grid overlay..."
174
+ )
175
+ await show_grid_async(page)
176
+ await browser_logger.info(
177
+ page, f"{func_name}: Chrome PDF: Clicking center of PDF..."
178
+ )
179
+ await click_center_async(page)
180
+
181
+ # Step 6: Click download button
182
+ await browser_logger.debug(
183
+ page, f"{func_name}: Chrome PDF: Clicking download button..."
184
+ )
185
+ await browser_logger.info(
186
+ page, f"{func_name}: Chrome PDF: Clicking download button..."
187
+ )
188
+ is_downloaded = await click_download_for_chrome_pdf_viewer_async(
189
+ page, output_path
190
+ )
191
+
192
+ # Step 7: Wait for download to complete (use networkidle for patience)
193
+ await browser_logger.debug(
194
+ page,
195
+ f"{func_name}: Chrome PDF: Waiting for download to complete...",
196
+ )
197
+ await browser_logger.info(
198
+ page,
199
+ f"{func_name}: Chrome PDF: Waiting for download (networkidle up to 30s)...",
200
+ )
201
+ try:
202
+ # Wait for any download-related network activity to complete
203
+ await page.wait_for_load_state("networkidle", timeout=30_000)
204
+ await browser_logger.debug(
205
+ page,
206
+ f"{func_name}: Chrome PDF: Network idle after download click",
207
+ )
208
+ await browser_logger.info(
209
+ page,
210
+ f"{func_name}: Chrome PDF: ✓ Download network activity complete",
211
+ )
212
+ await page.wait_for_timeout(2000)
213
+ except Exception as e:
214
+ await browser_logger.debug(
215
+ page,
216
+ f"{func_name}: Download networkidle timeout (non-fatal): {e}",
217
+ )
218
+ await browser_logger.info(
219
+ page,
220
+ f"{func_name}: Chrome PDF: Network timeout, checking file...",
221
+ )
222
+ await page.wait_for_timeout(2000)
223
+
224
+ # Step 8: Check if file was actually downloaded
225
+ # Check browser download directory for new files (even if Playwright event didn't fire)
226
+ files_after = (
227
+ set(browser_downloads_dir.glob("*"))
228
+ if browser_downloads_dir.exists()
229
+ else set()
230
+ )
231
+ new_files = files_after - files_before
232
+ download_duration = time.time() - download_start_time
233
+
234
+ logger.info(f"{func_name}: Checking download result...")
235
+ logger.info(
236
+ f"{func_name}: is_downloaded (Playwright): {is_downloaded}"
237
+ )
238
+ logger.info(f"{func_name}: output_path: {output_path}")
239
+ logger.info(f"{func_name}: Files before: {len(files_before)}")
240
+ logger.info(f"{func_name}: Files after: {len(files_after)}")
241
+ logger.info(f"{func_name}: New files: {len(new_files)}")
242
+
243
+ if new_files:
244
+ # Found new file(s) in download directory
245
+ downloaded_file = max(new_files, key=lambda p: p.stat().st_mtime)
246
+ file_size = downloaded_file.stat().st_size
247
+ file_size_mb = file_size / (1024 * 1024)
248
+
249
+ logger.info(
250
+ f"{func_name}: Found downloaded file: {downloaded_file.name}"
251
+ )
252
+ logger.info(f"{func_name}: Size: {file_size_mb:.2f} MB")
253
+ logger.info(f"{func_name}: Duration: {download_duration:.1f}s")
254
+ logger.info(f"{func_name}: Location: {downloaded_file}")
255
+
256
+ if file_size > 1000: # At least 1KB
257
+ # Rename to desired output filename
258
+ import shutil
259
+
260
+ output_path.parent.mkdir(parents=True, exist_ok=True)
261
+ shutil.move(str(downloaded_file), str(output_path))
262
+
263
+ await browser_logger.info(
264
+ page,
265
+ f"{func_name}: ✓ Downloaded {file_size_mb:.2f} MB in {download_duration:.1f}s",
266
+ )
267
+ await browser_logger.info(
268
+ page,
269
+ f"{func_name}: ✓ Saved to: {str(output_path)}",
270
+ )
271
+ logger.info(
272
+ f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
273
+ )
274
+ await page.wait_for_timeout(3000)
275
+ await page.close()
276
+ return output_path
277
+
278
+ if is_downloaded and output_path.exists():
279
+ file_size = output_path.stat().st_size
280
+ file_size_mb = file_size / (1024 * 1024)
281
+ if file_size > 1000: # At least 1KB
282
+ await browser_logger.info(
283
+ page,
284
+ f"{func_name}: ✓ Downloaded {file_size_mb:.2f} MB",
285
+ )
286
+ await browser_logger.info(
287
+ page,
288
+ f"{func_name}: ✓ Saved to: {str(output_path)}",
289
+ )
290
+ logger.info(
291
+ f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
292
+ )
293
+ await page.wait_for_timeout(3000) # Show info for 3s
294
+ await page.close()
295
+ return output_path
296
+ else:
297
+ await browser_logger.warning(
298
+ page,
299
+ f"{func_name}: ✗ File too small: {file_size} bytes",
300
+ )
301
+ logger.warning(
302
+ f"{func_name}: Download failed - file too small: {file_size} bytes"
303
+ )
304
+ await page.wait_for_timeout(2000)
305
+ await page.close()
306
+ return None
307
+ elif output_path.exists():
308
+ # File exists but is_downloaded is False - still check file
309
+ file_size = output_path.stat().st_size
310
+ file_size_mb = file_size / (1024 * 1024)
311
+ if file_size > 1000:
312
+ await browser_logger.info(
313
+ page,
314
+ f"{func_name}: ✓ File found: {file_size_mb:.2f} MB",
315
+ )
316
+ await browser_logger.info(
317
+ page,
318
+ f"{func_name}: ✓ Saved to: {str(output_path)}",
319
+ )
320
+ logger.info(
321
+ f"{func_name}: Downloaded PDF: {output_path} ({file_size_mb:.2f} MB)"
322
+ )
323
+ await page.wait_for_timeout(3000)
324
+ await page.close()
325
+ return output_path
326
+
327
+ await browser_logger.warning(
328
+ page, f"{func_name}: ✗ Download did not complete"
329
+ )
330
+ logger.warning(
331
+ f"{func_name}: Download did not complete (is_downloaded={is_downloaded}, exists={output_path.exists()})"
332
+ )
333
+ await page.wait_for_timeout(2000)
334
+ await page.close()
335
+
336
+ if is_downloaded:
337
+ await browser_logger.info(
338
+ page,
339
+ f"{func_name}: Downloaded via Chrome PDF Viewer from {str(pdf_url)} to {str(output_path)}",
340
+ )
341
+ return output_path
342
+ else:
343
+ await browser_logger.debug(
344
+ page,
345
+ f"{func_name}: Chrome PDF Viewer method did not work for {str(pdf_url)}",
346
+ )
347
+ return None
348
+
349
+ except Exception as ee:
350
+ # Log error safely without browser popup (avoids recursive errors)
351
+ error_msg = f"{func_name}: Chrome PDF Viewer failed: {type(ee).__name__}: {str(ee)}"
352
+ logger.error(error_msg)
353
+ logger.debug(f" URL: {pdf_url}")
354
+ logger.debug(f" Output: {output_path}")
355
+
356
+ if page:
357
+ try:
358
+ await browser_logger.info(
359
+ page,
360
+ f"{func_name}: Chrome PDF: ✗ EXCEPTION: {type(ee).__name__}",
361
+ )
362
+ await page.wait_for_timeout(2000) # Show error for 2s
363
+ except Exception as popup_error:
364
+ logger.debug(
365
+ f"{func_name}: Could not show error popup: {popup_error}"
366
+ )
367
+ finally:
368
+ try:
369
+ await page.close()
370
+ except Exception as close_error:
371
+ logger.debug(
372
+ f"{func_name}: Error closing page: {close_error}"
373
+ )
374
+ return None
375
+
376
+ # EOF
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 07:59:52 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/direct_download.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/pdf_download/strategies/direct_download.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+ """Direct Download Strategy"""
14
+
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ from playwright.async_api import BrowserContext
19
+
20
+ from scitex import logging
21
+ from scitex.scholar.browser import browser_logger
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ async def try_download_direct_async(
27
+ context: BrowserContext,
28
+ pdf_url: str,
29
+ output_path: Path,
30
+ func_name: str = "try_download_direct_async",
31
+ ) -> Optional[Path]:
32
+ """Handle direct download that triggers ERR_ABORTED."""
33
+ page = None
34
+ try:
35
+ page = await context.new_page()
36
+ await browser_logger.info(
37
+ page, f"{func_name}: Trying direct download from {pdf_url}"
38
+ )
39
+
40
+ download_occurred = False
41
+
42
+ async def handle_download(download):
43
+ nonlocal download_occurred
44
+ await download.save_as(output_path)
45
+ download_occurred = True
46
+
47
+ page.on("download", handle_download)
48
+
49
+ # Step 1: Navigate
50
+ await browser_logger.info(
51
+ page,
52
+ f"{func_name}: Direct Download: Navigating to {pdf_url[:60]}...",
53
+ )
54
+ try:
55
+ await page.goto(pdf_url, wait_until="load", timeout=60_000)
56
+ await browser_logger.info(
57
+ page,
58
+ f"{func_name}: Direct Download: Loaded at {page.url[:80]}",
59
+ )
60
+ except Exception as ee:
61
+ if "ERR_ABORTED" in str(ee):
62
+ await browser_logger.info(
63
+ page,
64
+ f"{func_name}: Direct Download: ERR_ABORTED detected - likely direct download",
65
+ )
66
+ await browser_logger.info(
67
+ page,
68
+ f"{func_name}: Direct Download: ERR_ABORTED (download may have started)",
69
+ )
70
+ await page.wait_for_timeout(5_000)
71
+ else:
72
+ await browser_logger.info(
73
+ page,
74
+ f"{func_name}: Direct Download: ✗ Error: {str(ee)[:80]}",
75
+ )
76
+ await page.wait_for_timeout(2000)
77
+ raise ee
78
+
79
+ # Step 2: Check result
80
+ if download_occurred and output_path.exists():
81
+ size_MiB = output_path.stat().st_size / 1024 / 1024
82
+ await browser_logger.info(
83
+ page,
84
+ f"{func_name}: Direct download: from {pdf_url} to {output_path} ({size_MiB:.2f} MiB)",
85
+ )
86
+ await browser_logger.info(
87
+ page,
88
+ f"{func_name}: Direct Download: ✓ Downloaded {size_MiB:.2f} MB",
89
+ )
90
+ await page.wait_for_timeout(2000)
91
+ await page.close()
92
+ return output_path
93
+ else:
94
+ await browser_logger.debug(
95
+ page,
96
+ f"{func_name}: Direct download: No download event occurred",
97
+ )
98
+ await browser_logger.info(
99
+ page,
100
+ f"{func_name}: Direct Download: ✗ No download event occurred",
101
+ )
102
+ await page.wait_for_timeout(2000)
103
+
104
+ await page.close()
105
+ return None
106
+
107
+ except Exception as ee:
108
+ if page is not None:
109
+ await browser_logger.warning(
110
+ page, f"{func_name}: Direct download failed: {ee}"
111
+ )
112
+ try:
113
+ await browser_logger.info(
114
+ page,
115
+ f"{func_name}: Direct Download: ✗ EXCEPTION: {str(ee)[:100]}",
116
+ )
117
+ await page.wait_for_timeout(2000)
118
+ except Exception as popup_error:
119
+ logger.debug(
120
+ f"{func_name}: Could not show error popup: {popup_error}"
121
+ )
122
+ finally:
123
+ try:
124
+ await page.close()
125
+ except Exception as close_error:
126
+ logger.debug(
127
+ f"{func_name}: Error closing page: {close_error}"
128
+ )
129
+ return None
130
+
131
+ # EOF
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 08:00:08 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/pdf_download/strategies/manual_download_fallback.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+ """Manual Download Fallback Strategy"""
14
+
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ from playwright.async_api import BrowserContext
19
+
20
+ from scitex import logging
21
+ from scitex.scholar import ScholarConfig
22
+ from scitex.scholar.browser import browser_logger
23
+ from scitex.scholar.pdf_download.strategies.manual_download_utils import (
24
+ DownloadMonitorAndSync,
25
+ complete_manual_download_workflow_async,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ async def try_download_manual_async(
32
+ context: BrowserContext,
33
+ pdf_url: str,
34
+ output_path: Path,
35
+ func_name: str = "try_download_manual_async",
36
+ config: ScholarConfig = None,
37
+ doi: Optional[str] = None,
38
+ ) -> Optional[Path]:
39
+ """Manual download fallback strategy.
40
+
41
+ Opens PDF URL in browser, shows instructions, and monitors downloads directory.
42
+ When user manually downloads the PDF, it automatically detects and organizes it.
43
+
44
+ NOTE: This method should NOT check the _scitex_is_manual_mode flag because
45
+ it IS the manual mode implementation!
46
+
47
+ Args:
48
+ context: Browser context
49
+ pdf_url: URL of the PDF to download
50
+ output_path: Where to save the final PDF
51
+ func_name: Name for logging
52
+ config: Scholar configuration
53
+ doi: Optional DOI for filename generation
54
+
55
+ Returns:
56
+ Path to downloaded file, or None if failed
57
+ """
58
+ config = config or ScholarConfig()
59
+ page = None
60
+
61
+ try:
62
+ # Create new page and navigate to PDF
63
+ page = await context.new_page()
64
+
65
+ await browser_logger.info(
66
+ page,
67
+ f"{func_name}: Opening PDF for manual download...",
68
+ )
69
+
70
+ await page.goto(pdf_url, timeout=30000, wait_until="domcontentloaded")
71
+
72
+ await browser_logger.info(
73
+ page,
74
+ f"{func_name}: Please download the PDF manually from this page",
75
+ )
76
+
77
+ # Setup monitoring
78
+ downloads_dir = config.get_library_downloads_dir()
79
+ master_dir = config.get_library_master_dir()
80
+ monitor = DownloadMonitorAndSync(downloads_dir, master_dir)
81
+
82
+ # Progress logger
83
+ def log_progress(msg: str):
84
+ logger.info(f"{func_name}: {msg}")
85
+
86
+ # Extract DOI from URL if not provided
87
+ if not doi and "doi.org/" in pdf_url:
88
+ doi = pdf_url.split("doi.org/")[-1].split("?")[0].split("#")[0]
89
+ elif not doi and "/doi/" in pdf_url:
90
+ # Try to extract DOI from URL like /doi/10.1212/...
91
+ import re
92
+
93
+ match = re.search(r"/doi/(10\.\d+/[^\s?#]+)", pdf_url)
94
+ if match:
95
+ doi = match.group(1)
96
+
97
+ # Show instructions and start monitoring
98
+ log_progress(f"Monitoring {downloads_dir} for new PDFs...")
99
+ log_progress("Please download the PDF manually from the browser")
100
+
101
+ # Monitor for download (2 minutes timeout to prevent process accumulation)
102
+ temp_file = await monitor.monitor_for_new_download_async(
103
+ timeout_sec=120, # 2 minutes
104
+ check_interval_sec=1.0,
105
+ logger_func=log_progress,
106
+ )
107
+
108
+ if not temp_file:
109
+ await browser_logger.error(
110
+ page,
111
+ f"{func_name}: No new PDF detected in 120 seconds",
112
+ )
113
+ logger.error(f"{func_name}: Download monitoring timeout")
114
+ await page.close()
115
+ return None
116
+
117
+ await browser_logger.info(
118
+ page,
119
+ f"{func_name}: Detected: {temp_file.name} ({temp_file.stat().st_size / 1e6:.1f} MB)",
120
+ )
121
+
122
+ # Sync to library
123
+ final_path = monitor.sync_to_final_destination(
124
+ temp_file,
125
+ doi=doi,
126
+ url=pdf_url,
127
+ content_type="main",
128
+ )
129
+
130
+ await browser_logger.info(
131
+ page,
132
+ f"{func_name}: Synced to library: {final_path.name}",
133
+ )
134
+
135
+ # Copy to requested output path
136
+ if final_path and final_path.exists():
137
+ import shutil
138
+
139
+ output_path.parent.mkdir(parents=True, exist_ok=True)
140
+ shutil.copy2(str(final_path), str(output_path))
141
+
142
+ await browser_logger.info(
143
+ page,
144
+ f"{func_name}: Manual download complete!",
145
+ )
146
+
147
+ logger.info(f"{func_name}: Manual download saved to {output_path}")
148
+ await page.close()
149
+ return output_path
150
+
151
+ await page.close()
152
+ return None
153
+
154
+ except Exception as e:
155
+ logger.error(f"{func_name}: Manual download failed: {e}")
156
+ if page:
157
+ try:
158
+ await browser_logger.error(
159
+ page,
160
+ f"{func_name}: Error: {type(e).__name__}",
161
+ )
162
+ await page.close()
163
+ except Exception:
164
+ pass
165
+ return None
166
+
167
+ # EOF