scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,256 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-11 01:19:48 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/find_pdf_urls_by_navigation.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/url/strategies/find_pdf_urls_by_navigation.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """
15
+ Find PDF URLs by navigating to PDF links and following redirects.
16
+
17
+ Handles publishers like ScienceDirect that require navigation
18
+ through redirect chains to reach the actual PDF URL.
19
+ """
20
+
21
+ from typing import List, Optional
22
+ from urllib.parse import urljoin
23
+
24
+ from playwright.async_api import Page
25
+
26
+ from scitex.browser.debugging import browser_logger
27
+ from scitex.scholar import ScholarConfig
28
+ from scitex.scholar.browser.utils import wait_redirects
29
+
30
+
31
+ async def find_pdf_urls_by_navigation(
32
+ page: Page,
33
+ url: str = None,
34
+ config: ScholarConfig = None,
35
+ func_name: str = "find_pdf_urls_by_navigation",
36
+ ) -> List[str]:
37
+ """
38
+ Find PDF URLs by navigating to PDF links and capturing final URLs.
39
+
40
+ This handles cases like ScienceDirect where:
41
+ 1. Direct PDF links exist (/pdfft? endpoints)
42
+ 2. Navigating to them triggers redirects
43
+ 3. Final destination is the actual PDF on pdf.sciencedirectassets.com
44
+
45
+ Args:
46
+ page: Playwright page object
47
+ url: Current page URL (unused, for signature consistency)
48
+ config: ScholarConfig instance
49
+ func_name: Function name for logging
50
+
51
+ Returns:
52
+ List of PDF URLs found
53
+ """
54
+ config = config or ScholarConfig()
55
+ pdf_urls = []
56
+
57
+ try:
58
+ # Check if we already have direct PDF links
59
+ current_url = page.url.lower()
60
+ is_sciencedirect = any(
61
+ domain in current_url
62
+ for domain in [
63
+ "sciencedirect.com",
64
+ "cell.com",
65
+ "elsevier.com",
66
+ "ssrn.com",
67
+ ]
68
+ )
69
+
70
+ if not is_sciencedirect:
71
+ return []
72
+
73
+ # Look for existing PDF links on the page
74
+ pdf_link_selectors = [
75
+ 'a[href*="/pdfft?"]', # ScienceDirect PDF endpoint
76
+ 'a[aria-label*="Download PDF"]',
77
+ 'a[aria-label*="Download This Paper"]',
78
+ 'a:has-text("View PDF")',
79
+ "a.pdf-link",
80
+ 'a[href*="/pdf/"]',
81
+ ]
82
+
83
+ pdf_href = None
84
+ for selector in pdf_link_selectors:
85
+ try:
86
+ element = await page.query_selector(selector)
87
+ if element:
88
+ href = await element.get_attribute("href")
89
+ if href and ("/pdfft?" in href or "/pdf/" in href):
90
+ # Make absolute URL if needed
91
+ if href.startswith("/"):
92
+ href = urljoin(page.url, href)
93
+ pdf_href = href
94
+ await browser_logger.debug(
95
+ page,
96
+ f"{func_name}: Found PDF link: {href[:80]}...",
97
+ )
98
+ break
99
+ except Exception as e:
100
+ await browser_logger.debug(
101
+ page,
102
+ f"{func_name}: Error checking selector {selector}: {e}",
103
+ )
104
+ continue
105
+
106
+ if not pdf_href:
107
+ await browser_logger.debug(
108
+ page, f"{func_name}: No PDF links found on page"
109
+ )
110
+ return []
111
+
112
+ # Navigate to PDF URL in a new page to capture final URL
113
+ context = page.context
114
+ new_page = None
115
+
116
+ try:
117
+ await browser_logger.debug(
118
+ page,
119
+ f"{func_name}: Navigating to PDF URL to capture final destination...",
120
+ )
121
+ new_page = await context.new_page()
122
+
123
+ # Navigate and wait for redirects - be patient!
124
+ await new_page.goto(pdf_href, wait_until="commit", timeout=60000)
125
+
126
+ # Wait for redirects to complete with longer timeout
127
+ redirect_result = await wait_redirects(
128
+ new_page,
129
+ timeout=60000, # 60 seconds timeout
130
+ show_progress=False,
131
+ track_chain=True,
132
+ auth_aware=True,
133
+ wait_for_idle=True, # Wait for network idle
134
+ )
135
+
136
+ final_url = redirect_result.get("final_url", new_page.url)
137
+ redirect_chain = redirect_result.get("redirect_chain", [])
138
+
139
+ # Additional wait to ensure PDF loads
140
+ await new_page.wait_for_timeout(2000)
141
+
142
+ # Check again for final URL
143
+ final_url = new_page.url
144
+
145
+ await browser_logger.debug(
146
+ page,
147
+ f"{func_name}: Redirect complete after {len(redirect_chain)} steps",
148
+ )
149
+ await browser_logger.debug(
150
+ page, f"{func_name}: Final URL: {final_url[:80]}..."
151
+ )
152
+
153
+ # Check if final URL is a PDF
154
+ if any(
155
+ indicator in final_url
156
+ for indicator in [
157
+ "pdf.sciencedirectassets.com",
158
+ ".pdf",
159
+ "application/pdf",
160
+ "/pdf/",
161
+ "pdfft?",
162
+ ]
163
+ ):
164
+ pdf_urls.append(final_url)
165
+ await browser_logger.debug(
166
+ page, f"{func_name}: Captured final PDF URL via navigation"
167
+ )
168
+ else:
169
+ await browser_logger.debug(
170
+ page,
171
+ f"{func_name}: Final URL doesn't appear to be a PDF: {final_url}",
172
+ )
173
+
174
+ except Exception as e:
175
+ await browser_logger.error(
176
+ page, f"{func_name}: Error navigating to PDF: {e}"
177
+ )
178
+ finally:
179
+ if new_page:
180
+ try:
181
+ await new_page.close()
182
+ except:
183
+ pass
184
+
185
+ return pdf_urls
186
+
187
+ except Exception as e:
188
+ await browser_logger.error(
189
+ page, f"{func_name}: Error finding PDFs via navigation: {e}"
190
+ )
191
+ return []
192
+
193
+
194
+ async def find_pdf_url_from_sciencedirect_api(
195
+ page: Page, func_name="find_pdf_url_from_sciencedirect_api"
196
+ ) -> Optional[str]:
197
+ """
198
+ Extract PDF URL from ScienceDirect page using their JavaScript context.
199
+
200
+ ScienceDirect pages often have the PDF URL in JavaScript variables.
201
+ """
202
+ try:
203
+ # Try to extract from page's JavaScript context
204
+ pdf_info = await page.evaluate(
205
+ """
206
+ () => {
207
+ // Look for PDF URL in various places
208
+
209
+ // Check for download URL in page data
210
+ const pdfLinks = document.querySelectorAll('a[href*="/pdfft?"]');
211
+ if (pdfLinks.length > 0) {
212
+ return pdfLinks[0].href;
213
+ }
214
+
215
+ // Check window.SDM object (ScienceDirect)
216
+ if (window.SDM && window.SDM.pdfUrl) {
217
+ return window.SDM.pdfUrl;
218
+ }
219
+
220
+ // Check for entitlement info
221
+ if (window.SD && window.SD.article && window.SD.article.pdfDownloadUrl) {
222
+ return window.SD.article.pdfDownloadUrl;
223
+ }
224
+
225
+ // Look for View PDF button
226
+ const viewPdfBtn = document.querySelector('a[aria-label*="View PDF"]');
227
+ if (viewPdfBtn && viewPdfBtn.href) {
228
+ return viewPdfBtn.href;
229
+ }
230
+
231
+ // Check meta tags
232
+ const pdfMeta = document.querySelector('meta[name="citation_pdf_url"]');
233
+ if (pdfMeta && pdfMeta.content) {
234
+ return pdfMeta.content;
235
+ }
236
+
237
+ return null;
238
+ }
239
+ """
240
+ )
241
+
242
+ if pdf_info:
243
+ await browser_logger.debug(
244
+ page,
245
+ f"{func_name}: Found PDF info from page context: {pdf_info[:80]}...",
246
+ )
247
+ return pdf_info
248
+
249
+ except Exception as e:
250
+ await browser_logger.debug(
251
+ page, f"Could not extract PDF info from page context: {e}"
252
+ )
253
+
254
+ return None
255
+
256
+ # EOF
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-10 01:33:13 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/publisher_patterns.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/url/strategies/publisher_patterns.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ __FILE__ = __file__
15
+
16
+ import re
17
+ from typing import List
18
+
19
+ from scitex.browser.debugging import browser_logger
20
+
21
+
22
+ async def find_pdf_urls_by_publisher_patterns(
23
+ page,
24
+ url: str = None,
25
+ config=None,
26
+ func_name: str = "find_pdf_urls_by_publisher_patterns"
27
+ ) -> List[str]:
28
+ """
29
+ Generate PDF URLs based on publisher-specific URL patterns.
30
+
31
+ Args:
32
+ page: Playwright page object (unused, for signature consistency)
33
+ url: Page URL to analyze (defaults to page.url if not provided)
34
+ config: ScholarConfig instance (unused, for signature consistency)
35
+ func_name: Function name for logging
36
+
37
+ Returns:
38
+ List of PDF URLs generated from patterns
39
+ """
40
+ url = url or page.url
41
+ urls_pdf = []
42
+
43
+ # Nature
44
+ if "nature.com" in url and not url.endswith(".pdf"):
45
+ urls_pdf.append(url.rstrip("/") + ".pdf")
46
+
47
+ # Science
48
+ elif "science.org" in url and "/doi/10." in url and "/pdf/" not in url:
49
+ urls_pdf.append(url.replace("/doi/", "/doi/pdf/"))
50
+
51
+ # Elsevier/ScienceDirect
52
+ elif "sciencedirect.com" in url and "/pii/" in url:
53
+ pii = url.split("/pii/")[-1].split("/")[0].split("?")[0]
54
+ urls_pdf.append(
55
+ f"https://www.sciencedirect.com/science/article/pii/{pii}/pdfft"
56
+ )
57
+
58
+ # Wiley
59
+ elif "wiley.com" in url and "/doi/" in url and "/pdfdirect" not in url:
60
+ urls_pdf.append(url.replace("/doi/", "/doi/pdfdirect/"))
61
+
62
+ # Frontiers
63
+ elif "frontiersin.org" in url and "/full" in url:
64
+ urls_pdf.append(url.replace("/full", "/pdf"))
65
+
66
+ # Springer
67
+ elif (
68
+ "springer.com" in url or "link.springer.com" in url
69
+ ) and "/article/" in url:
70
+ if not url.endswith(".pdf"):
71
+ urls_pdf.append(url.rstrip("/") + ".pdf")
72
+
73
+ # IEEE
74
+ elif "ieee.org" in url and "/document/" in url:
75
+ doc_id = url.split("/document/")[-1].split("/")[0]
76
+ urls_pdf.append(
77
+ f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id}"
78
+ )
79
+
80
+ # IOP Publishing
81
+ elif "iopscience.iop.org" in url and "/article/" in url:
82
+ # Pattern: /article/10.1088/1741-2552/aaf92e
83
+ # PDF URL: /article/10.1088/1741-2552/aaf92e/pdf
84
+ if not url.endswith("/pdf"):
85
+ urls_pdf.append(url.rstrip("/") + "/pdf")
86
+ # Also try: /article/10.1088/1741-2552/aaf92e/pdf/metrics
87
+ # Some IOP articles have different PDF locations
88
+ doi_match = re.search(r"/article/(10\.\d+/[\w\-\.]+)", url)
89
+ if doi_match:
90
+ doi = doi_match.group(1)
91
+ # Alternative PDF locations for IOP
92
+ urls_pdf.append(f"https://iopscience.iop.org/article/{doi}/pdf")
93
+ urls_pdf.append(
94
+ f"https://iopscience.iop.org/article/{doi}/pdf/metrics"
95
+ )
96
+
97
+ # MDPI
98
+ elif "mdpi.com" in url and "/htm" in url:
99
+ urls_pdf.append(url.replace("/htm", "/pdf"))
100
+
101
+ # BMC
102
+ elif "biomedcentral.com" in url and "/articles/" in url:
103
+ urls_pdf.append(url.replace("/articles/", "/track/pdf/"))
104
+
105
+ # PLOS
106
+ elif "plos.org" in url and "/article" in url:
107
+ if "?id=" in url:
108
+ article_id = url.split("?id=")[-1].split("&")[0]
109
+ base_url = url.split("/article")[0]
110
+ urls_pdf.append(
111
+ f"{base_url}/article/file?id={article_id}&type=printable"
112
+ )
113
+ elif "/article/" in url:
114
+ urls_pdf.append(
115
+ url.replace("/article/", "/article/file?id=").split("?")[0]
116
+ + "&type=printable"
117
+ )
118
+
119
+ # Journal of Neuroscience
120
+ if "jneurosci.org" in url and "/content/" in url:
121
+ # Extract volume/issue/page numbers
122
+ match = re.search(r"/content/(\d+)/(\d+)/(\d+)", url)
123
+ if match:
124
+ vol, issue, page = match.groups()
125
+ urls_pdf.append(
126
+ f"https://www.jneurosci.org/content/jneuro/{vol}/{issue}/{page}.full.pdf"
127
+ )
128
+
129
+ # eNeuro
130
+ elif "eneuro.org" in url and "/content/" in url:
131
+ # Pattern: /content/3/6/ENEURO.0334-16.2016
132
+ match = re.search(r"/content/[^/]+/[^/]+/(ENEURO\.[^/]+)", url)
133
+ if match:
134
+ eneuro_id = match.group(1)
135
+ urls_pdf.append(
136
+ f"https://www.eneuro.org/content/eneuro/early/recent/{eneuro_id}.full.pdf"
137
+ )
138
+
139
+ # Oxford Academic
140
+ elif "academic.oup.com" in url:
141
+ urls_pdf.append(url.replace("/article/", "/article-pdf/"))
142
+
143
+ # Improve preprint handling
144
+ elif "biorxiv.org" in url or "medrxiv.org" in url:
145
+ # Handle versioned URLs better
146
+ if "/v" in url: # e.g., /v1, /v2
147
+ base_url = url.split("/v")[0]
148
+ urls_pdf.append(f"{base_url}.full.pdf")
149
+ else:
150
+ urls_pdf.append(url + ".full.pdf")
151
+
152
+ elif "arxiv.org" in url:
153
+ if "/abs/" in url:
154
+ arxiv_id = url.split("/abs/")[-1]
155
+ urls_pdf.append(f"https://arxiv.org/pdf/{arxiv_id}.pdf")
156
+
157
+ if urls_pdf:
158
+ await browser_logger.debug(
159
+ page,
160
+ f"{func_name}: Pattern matching found {len(urls_pdf)} URLs"
161
+ )
162
+
163
+ return urls_pdf
164
+
165
+ # EOF
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 06:32:08 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ __FILE__ = __file__
15
+
16
+ """
17
+ Find PDF URLs using Python Zotero translators.
18
+
19
+ This module uses the zotero-translators-python package instead of running
20
+ JavaScript translators. It provides better performance, reliability, and
21
+ maintainability compared to the JavaScript-based approach.
22
+
23
+ Features:
24
+ - 100+ Python translators available
25
+ - No JavaScript execution overhead
26
+ - Better error handling
27
+ - Easier debugging
28
+ - Type safety
29
+
30
+ Usage:
31
+ from scitex.scholar.url_finder.strategies import find_pdf_urls_by_zotero_translators
32
+
33
+ pdf_urls = await find_pdf_urls_by_zotero_translators(page, url)
34
+ """
35
+
36
+ from typing import List
37
+
38
+ from playwright.async_api import Page
39
+ from zotero_translators_python.core.registry import TranslatorRegistry
40
+
41
+ from scitex import logging
42
+ from scitex.browser import browser_logger
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ async def find_pdf_urls_by_zotero_translators(
48
+ page: Page,
49
+ url: str = None,
50
+ config=None,
51
+ func_name: str = "find_pdf_urls_by_zotero_translators",
52
+ ) -> List[str]:
53
+ """
54
+ Find PDF URLs using Python-based Zotero translators.
55
+
56
+ This is the preferred method over JavaScript translators due to:
57
+ - Better performance (no JS eval overhead)
58
+ - Better reliability (proper error handling)
59
+ - Better maintainability (Python codebase)
60
+ - Better debugging (Python stack traces)
61
+
62
+ Args:
63
+ page: Playwright page object with loaded content
64
+ url: Current page URL (defaults to page.url if not provided)
65
+ config: ScholarConfig instance (unused, for signature consistency)
66
+ func_name: Function name for logging
67
+
68
+ Returns:
69
+ List of PDF URLs extracted by matching translators
70
+ Empty list if no translator matches or extraction fails
71
+
72
+ Examples:
73
+ >>> async with async_playwright() as p:
74
+ ... browser = await p.chromium.launch()
75
+ ... page = await browser.new_page()
76
+ ... await page.goto("https://www.nature.com/articles/nature12345")
77
+ ... pdf_urls = await find_pdf_urls_by_zotero_translators(page)
78
+ ... print(f"Found {len(pdf_urls)} PDF URLs")
79
+ """
80
+ url = url or page.url
81
+
82
+ try:
83
+ # Get registry of all available translators
84
+ registry = TranslatorRegistry()
85
+
86
+ # Find matching translator for this URL
87
+ matching_translator = registry.get_translator_for_url(url)
88
+
89
+ if not matching_translator:
90
+ await browser_logger.info(
91
+ page, f"{func_name}: No Python translator matches URL: {url}"
92
+ )
93
+ return []
94
+
95
+ # Try the matching translator
96
+ all_pdf_urls = []
97
+ try:
98
+ # logger.debug(
99
+ # f"{func_name}: Trying {matching_translator.LABEL} translator..."
100
+ # )
101
+ # logger.debug(f"{func_name}: Page URL: {page.url}")
102
+ # logger.debug(f"{func_name}: Translator target URL: {url}")
103
+
104
+ # Extract PDF URLs using the translator
105
+ pdf_urls = await matching_translator.extract_pdf_urls_async(page)
106
+
107
+ # logger.debug(f"{func_name}: Translator returned: {pdf_urls}")
108
+
109
+ if pdf_urls:
110
+ await browser_logger.debug(
111
+ page,
112
+ f"{func_name}: {matching_translator.LABEL} found {len(pdf_urls)} PDF URL(s)",
113
+ )
114
+ for i_pdf, pdf_url in enumerate(pdf_urls, 1):
115
+ await browser_logger.debug(
116
+ page, f"{func_name} {i_pdf}. {pdf_url}"
117
+ )
118
+
119
+ all_pdf_urls.extend(pdf_urls)
120
+ else:
121
+ await browser_logger.warning(
122
+ page,
123
+ f"{func_name}: {matching_translator.LABEL} returned empty list - check if page loaded correctly",
124
+ )
125
+
126
+ except Exception as e:
127
+ import traceback
128
+
129
+ await browser_logger.error(
130
+ page,
131
+ f"{func_name}: {matching_translator.LABEL} extraction failed: {e}\nTraceback: {traceback.format_exc()}",
132
+ )
133
+
134
+ # logger.debug(f"{func_name}: Traceback: {traceback.format_exc()}")
135
+
136
+ # Remove duplicates while preserving order
137
+ seen = set()
138
+ unique_urls = []
139
+ for url_pdf in all_pdf_urls:
140
+ if url_pdf not in seen:
141
+ seen.add(url_pdf)
142
+ unique_urls.append(url_pdf)
143
+
144
+ if unique_urls:
145
+ await browser_logger.debug(
146
+ page,
147
+ f"{func_name}: ✓ Python Zotero found {len(unique_urls)} URLs",
148
+ )
149
+ else:
150
+ await browser_logger.debug(
151
+ page, f"{func_name}: No PDF URLs found by Python translators"
152
+ )
153
+
154
+ return unique_urls
155
+
156
+ except Exception as e:
157
+ await browser_logger.warning(
158
+ page,
159
+ f"{func_name}: Zotero strategy failed: {e}",
160
+ )
161
+ return []
162
+
163
+ # EOF
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-11 01:19:50 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/url/strategies/find_supplementary_urls_by_href.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/url/strategies/find_supplementary_urls_by_href.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ __FILE__ = __file__
15
+
16
+ from typing import Dict, List
17
+
18
+ from playwright.async_api import Page
19
+
20
+ from scitex import logging
21
+ from scitex.browser import browser_logger
22
+ from scitex.scholar import ScholarConfig
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ async def find_supplementary_urls_by_href(
28
+ page: Page, config: ScholarConfig = None
29
+ ) -> List[Dict]:
30
+ """Find supplementary material URLs in a web page."""
31
+ await browser_logger.debug(page, "Finding Supplementary URLs...")
32
+
33
+ config = config or ScholarConfig()
34
+ supplementary_selectors = config.resolve(
35
+ "supplementary_selectors",
36
+ default=[
37
+ 'a[href*="supplementary"]',
38
+ 'a[href*="supplement"]',
39
+ 'a[href*="additional"]',
40
+ ],
41
+ )
42
+
43
+ try:
44
+ supplementary = await page.evaluate(
45
+ f"""() => {{
46
+ const results = [];
47
+ const selectors = {supplementary_selectors};
48
+ const seen_urls = new Set();
49
+
50
+ selectors.forEach(selector => {{
51
+ document.querySelectorAll(selector).forEach(link => {{
52
+ if (link.href && !seen_urls.has(link.href)) {{
53
+ seen_urls.add(link.href);
54
+ results.push({{
55
+ url: link.href,
56
+ description: link.textContent.trim(),
57
+ source: 'href_pattern'
58
+ }});
59
+ }}
60
+ }});
61
+ }});
62
+ return results;
63
+ }}"""
64
+ )
65
+ return supplementary
66
+ except Exception as e:
67
+ logger.error(f"Error finding supplementary URLs: {e}")
68
+ return []
69
+
70
+ # EOF
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Scholar utilities - Organized by function.
5
+
6
+ Structure:
7
+ - text/: Text processing utilities (TextNormalizer)
8
+ - bibtex/: BibTeX parsing utilities
9
+ - cleanup/: Maintenance and cleanup scripts
10
+
11
+ For backward compatibility, TextNormalizer is re-exported at top level.
12
+ """
13
+
14
+ from .text import TextNormalizer
15
+ from .bibtex import parse_bibtex
16
+
17
+ __all__ = [
18
+ "TextNormalizer", # Most commonly used
19
+ "parse_bibtex",
20
+ ]
21
+
22
+ # EOF
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """BibTeX processing utilities for Scholar module."""
4
+
5
+ from ._parse_bibtex import parse_bibtex
6
+
7
+ __all__ = ["parse_bibtex"]
8
+
9
+ # EOF