scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,767 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 09:05:52 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/pipelines/ScholarPipelineSingle.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/pipelines/ScholarPipelineSingle.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """
15
+ Functionalities:
16
+ - Orchestrates full paper acquisition pipeline from query to storage
17
+ - Single command: query (DOI/title) + project → complete paper in library
18
+ - Coordinates all workers: metadata, URLs, download, extraction, storage
19
+ - Supports resumable processing (checks existing data at each step)
20
+ - Creates MASTER/{8-digit-ID}/ and project symlinks
21
+
22
+ Dependencies:
23
+ - packages:
24
+ - playwright
25
+ - pydantic
26
+
27
+ IO:
28
+ - input-files:
29
+ - None (starts from query string)
30
+
31
+ - output-files:
32
+ - library/MASTER/{paper_id}/metadata.json
33
+ - library/MASTER/{paper_id}/main.pdf
34
+ - library/MASTER/{paper_id}/content.txt
35
+ - library/{project}/{paper_id} -> ../MASTER/{paper_id}
36
+ """
37
+
38
+ """Imports"""
39
+ import argparse
40
+ import asyncio
41
+ import hashlib
42
+ from pathlib import Path
43
+ from typing import Optional
44
+
45
+ from scitex import logging
46
+ from scitex.scholar.core import Paper
47
+ from scitex.scholar.storage import PaperIO
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+ """Functions & Classes"""
52
+ class ScholarPipelineSingle:
53
+ """Orchestrates full paper acquisition pipeline"""
54
+
55
+ def __init__(
56
+ self, browser_mode: str = "interactive", chrome_profile: str = "system"
57
+ ):
58
+ self.name = self.__class__.__name__
59
+ self.browser_mode = browser_mode
60
+ self.chrome_profile = chrome_profile
61
+
62
+ async def process_single_paper(
63
+ self,
64
+ doi_or_title: str,
65
+ project: Optional[str] = None,
66
+ force: bool = False,
67
+ ) -> Paper:
68
+ """Process single paper from query (DOI or Title) to complete storage.
69
+
70
+ Pipeline:
71
+ 1. Resolve DOI (if doi_or_title is title)
72
+ 2. Create Paper object with 8-digit ID
73
+ 3. Resolve metadata (ScholarEngine.search_async)
74
+ 4. Enrich impact factor (ImpactFactorEngine)
75
+ 5. Find PDF URLs
76
+ 6. Download PDF (with manual mode support)
77
+ 7. Extract content
78
+ 8. Save to MASTER/{8-digit-ID}/
79
+ 9. Link to project (if specified)
80
+
81
+ Args:
82
+ doi_or_title: DOI or title string
83
+ project: Optional project name for symlinking
84
+ force: If True, ignore existing files and force fresh processing
85
+
86
+ Returns:
87
+ Tuple of (Complete Paper object, symlink_path)
88
+ """
89
+
90
+ # Step 1. Resolve DOI (if doi_or_title is title)
91
+ doi = self._step_01_normalize_as_doi(doi_or_title)
92
+
93
+ # Step 2. Create Paper object with 8-digit ID
94
+ paper = await self._step_02_create_paper(doi, doi_or_title)
95
+
96
+ # Step 3. Resolve metadata (ScholarEngine.search_async)
97
+ paper = self._step_03_add_paper_id(paper)
98
+
99
+ io = PaperIO(paper)
100
+ logger.info(f"{self.name}: Paper directory: {io.paper_dir}")
101
+ with logger.to(io.paper_dir / "logs" / "pipeline.log"):
102
+ # Step 4. Enrich impact factor (ImpactFactorEngine)
103
+ paper = await self._step_04_resolve_metadata(paper, io, force)
104
+
105
+ # Step 5. Find PDF URLs
106
+ browser_manager, context, auth_gateway = (
107
+ await self._step_05_setup_browser(paper, io)
108
+ )
109
+ if context:
110
+ # Step 6. Download PDF (with manual mode support)
111
+ await self._step_06_find_pdf_urls(
112
+ paper, io, context, auth_gateway, force
113
+ )
114
+
115
+ # Step 7. Extract content
116
+ await self._step_07_download_pdf(
117
+ paper, io, context, auth_gateway, force
118
+ )
119
+ if browser_manager:
120
+ await browser_manager.close()
121
+ # Step 8. Save to MASTER/{8-digit-ID}/
122
+ self._step_08_extract_content(io, force)
123
+
124
+ # Step 9. Link to project (if specified)
125
+ symlink_path = self._step_09_link_to_project(paper, io, project)
126
+
127
+ # Step 10. Print final status
128
+ self._step_10_log_final_status(io)
129
+
130
+ return paper, symlink_path
131
+
132
+ # ----------------------------------------
133
+ # Steps
134
+ # ----------------------------------------
135
+ def _step_01_normalize_as_doi(self, doi_or_title):
136
+ logger.info(f"{self.name}: Processing Query: {doi_or_title}")
137
+ is_doi = doi_or_title.strip().startswith("10.")
138
+ doi = doi_or_title.strip() if is_doi else None
139
+ return doi
140
+
141
+ async def _step_02_create_paper(self, doi, doi_or_title):
142
+ """Step 2: Create Paper object and resolve DOI from title if needed."""
143
+ paper = Paper()
144
+
145
+ if doi:
146
+ paper.metadata.id.doi = doi
147
+ paper.metadata.id.doi_engines = ["user_input"]
148
+ else:
149
+ # Use ScholarEngine to resolve DOI from title
150
+ from scitex.scholar.metadata_engines import ScholarEngine
151
+
152
+ engine = ScholarEngine()
153
+ metadata_dict = await engine.search_async(title=doi_or_title)
154
+
155
+ if metadata_dict and metadata_dict.get("id", {}).get("doi"):
156
+ doi = metadata_dict["id"]["doi"]
157
+ paper.metadata.id.doi = doi
158
+ paper.metadata.id.doi_engines = metadata_dict["id"].get(
159
+ "doi_engines", ["ScholarEngine"]
160
+ )
161
+ logger.success(f"{self.name}: DOI resolved from title: {doi}")
162
+
163
+ # Merge other metadata while we have it
164
+ self._merge_metadata_into_paper(paper, metadata_dict)
165
+ else:
166
+ logger.error(
167
+ f"{self.name}: Could not resolve DOI from title: {doi_or_title}"
168
+ )
169
+ raise ValueError(f"No DOI found for title: {doi_or_title}")
170
+
171
+ return paper
172
+
173
+ def _step_03_add_paper_id(self, paper):
174
+ paper_id = self._generate_paper_id(paper.metadata.id.doi)
175
+ paper.container.library_id = paper_id
176
+ logger.info(f"{self.name}: Library ID: {paper_id}")
177
+ return paper
178
+
179
+ async def _step_04_resolve_metadata(self, paper, io, force):
180
+ if not io.has_metadata() or force:
181
+ logger.info(f"{self.name}: Resolving metadata...")
182
+ from scitex.scholar.metadata_engines import ScholarEngine
183
+
184
+ engine = ScholarEngine()
185
+ metadata_dict = await engine.search_async(
186
+ doi=paper.metadata.id.doi
187
+ )
188
+ if metadata_dict:
189
+ self._merge_metadata_into_paper(paper, metadata_dict)
190
+ self._enrich_impact_factor(paper)
191
+ io.save_metadata()
192
+ logger.success(
193
+ f"{self.name}: Metadata enriched from search engines"
194
+ )
195
+ else:
196
+ paper.metadata.basic.title = "Pending metadata resolution"
197
+ paper.metadata.basic.title_engines = ["pending"]
198
+ io.save_metadata()
199
+ logger.warning(
200
+ f"{self.name}: No metadata found from engines, saved basic metadata"
201
+ )
202
+ else:
203
+ logger.info(f"{self.name}: Metadata exists, loading...")
204
+ paper = io.load_metadata()
205
+ if paper.metadata.basic.title == "Pending metadata resolution":
206
+ logger.info(f"{self.name}: Enriching existing metadata...")
207
+ from scitex.scholar.metadata_engines import ScholarEngine
208
+
209
+ engine = ScholarEngine()
210
+ metadata_dict = await engine.search_async(
211
+ doi=paper.metadata.id.doi
212
+ )
213
+ if metadata_dict:
214
+ self._merge_metadata_into_paper(paper, metadata_dict)
215
+ self._enrich_impact_factor(paper)
216
+ io.save_metadata()
217
+ logger.success(
218
+ f"{self.name}: Metadata enriched from search engines"
219
+ )
220
+ return paper
221
+
222
+ async def _step_05_setup_browser(self, paper, io):
223
+ needs_browser = not paper.metadata.url.pdfs or not io.has_pdf()
224
+ if not needs_browser:
225
+ return None, None, None
226
+ from scitex.scholar import ScholarAuthManager, ScholarBrowserManager
227
+ from scitex.scholar.auth import AuthenticationGateway
228
+
229
+ logger.info(
230
+ f"{self.name}: Setting up browser (profile: {self.chrome_profile})..."
231
+ )
232
+ auth_manager = ScholarAuthManager()
233
+ browser_manager = ScholarBrowserManager(
234
+ chrome_profile_name=self.chrome_profile,
235
+ browser_mode=self.browser_mode,
236
+ auth_manager=auth_manager,
237
+ )
238
+ browser, context = (
239
+ await browser_manager.get_authenticated_browser_and_context_async()
240
+ )
241
+ auth_gateway = AuthenticationGateway(
242
+ auth_manager=auth_manager,
243
+ browser_manager=browser_manager,
244
+ )
245
+ return browser_manager, context, auth_gateway
246
+
247
+ async def _step_06_find_pdf_urls(
248
+ self, paper, io, context, auth_gateway, force
249
+ ):
250
+ if not paper.metadata.url.pdfs or force:
251
+ logger.info(f"{self.name}: Finding PDF URLs...")
252
+ try:
253
+ url_context = await auth_gateway.prepare_context_async(
254
+ doi=paper.metadata.id.doi,
255
+ context=context,
256
+ )
257
+ publisher_url = (
258
+ url_context.url if url_context else paper.metadata.id.doi
259
+ )
260
+ except Exception as e:
261
+ logger.warning(f"{self.name}: Auth gateway failed: {e}")
262
+ publisher_url = paper.metadata.id.doi
263
+ from scitex.scholar import ScholarURLFinder
264
+
265
+ url_finder = ScholarURLFinder(context)
266
+ urls = await url_finder.find_pdf_urls(publisher_url)
267
+ paper.metadata.url.pdfs = urls
268
+ paper.metadata.url.pdfs_engines = ["ScholarURLFinder"]
269
+ io.save_metadata()
270
+ logger.info(f"{self.name}: Found {len(urls)} PDF URL(s)")
271
+ else:
272
+ logger.info(
273
+ f"{self.name}: PDF URLs exist in metadata ({len(paper.metadata.url.pdfs)} URLs)"
274
+ )
275
+
276
+ async def _step_07_download_pdf_from_url(
277
+ self, paper, io, context, auth_gateway, downloader
278
+ ):
279
+ pdf_url = paper.metadata.url.pdfs[0]
280
+ if isinstance(pdf_url, dict):
281
+ pdf_url = pdf_url["url"]
282
+ logger.info(f"{self.name}: PDF URL: {pdf_url}")
283
+ try:
284
+ url_context = await auth_gateway.prepare_context_async(
285
+ doi=paper.metadata.id.doi,
286
+ context=context,
287
+ )
288
+ except Exception as e:
289
+ logger.warn(str(e))
290
+ temp_pdf_path = io.paper_dir / "temp.pdf"
291
+ downloaded_file = await downloader.download_from_url(
292
+ pdf_url,
293
+ output_path=temp_pdf_path,
294
+ doi=paper.metadata.id.doi,
295
+ )
296
+ return downloaded_file, temp_pdf_path
297
+
298
+ def _step_07_handle_downloaded_pdf(
299
+ self, paper, io, downloaded_file, temp_pdf_path
300
+ ):
301
+ if downloaded_file == temp_pdf_path and temp_pdf_path.exists():
302
+ import shutil
303
+
304
+ main_pdf = io.get_pdf_path()
305
+ shutil.move(str(temp_pdf_path), str(main_pdf))
306
+ paper.metadata.path.pdfs = [str(main_pdf)]
307
+ paper.container.pdf_size_bytes = main_pdf.stat().st_size
308
+ io.save_metadata()
309
+ logger.success(
310
+ f"{self.name}: PDF downloaded directly to MASTER ({str(main_pdf)})"
311
+ )
312
+ else:
313
+ io.save_pdf(downloaded_file)
314
+ io.save_metadata()
315
+ logger.info(
316
+ f"{self.name}: PDF downloaded and saved ({str(downloaded_file)})"
317
+ )
318
+ logger.info(f"{self.name}: Updated metadata.path.pdfs")
319
+
320
+ def _step_07_check_manual_download(self, io):
321
+ logger.warning(f"{self.name}: Automated download returned None")
322
+ logger.info(
323
+ f"{self.name}: Checking downloads directory for manual downloads..."
324
+ )
325
+ import time
326
+
327
+ from scitex.scholar import ScholarConfig
328
+
329
+ config = ScholarConfig()
330
+ downloads_dir = config.get_library_downloads_dir()
331
+ current_time = time.time()
332
+ recent_pdfs = []
333
+ for pdf_path in downloads_dir.glob("*"):
334
+ if pdf_path.is_file() and pdf_path.stat().st_size > 100_000:
335
+ age_seconds = current_time - pdf_path.stat().st_mtime
336
+ if age_seconds < 600:
337
+ recent_pdfs.append((pdf_path, age_seconds))
338
+ if recent_pdfs:
339
+ recent_pdfs.sort(key=lambda x: x[1])
340
+ latest_pdf = recent_pdfs[0][0]
341
+ logger.info(
342
+ f"{self.name}: Found recent PDF: {latest_pdf.name} ({latest_pdf.stat().st_size / 1e6:.2f} MB)"
343
+ )
344
+ logger.info(
345
+ f"{self.name}: Assuming this is the manually downloaded PDF"
346
+ )
347
+ io.save_pdf(latest_pdf)
348
+ io.save_metadata()
349
+ logger.success(
350
+ f"{self.name}: Manually downloaded PDF saved to MASTER"
351
+ )
352
+ logger.info(f"{self.name}: Updated metadata.path.pdfs")
353
+ else:
354
+ logger.warning(
355
+ f"{self.name}: No recent PDFs found in downloads directory"
356
+ )
357
+ logger.warning(
358
+ f"{self.name}: PDF download incomplete - manual intervention required"
359
+ )
360
+
361
+ async def _step_07_download_pdf(
362
+ self, paper, io, context, auth_gateway, force
363
+ ):
364
+ if (not io.has_pdf() or force) and paper.metadata.url.pdfs:
365
+ logger.info(f"{self.name}: Downloading PDF...")
366
+ from scitex.scholar.pdf_download import ScholarPDFDownloader
367
+
368
+ downloader = ScholarPDFDownloader(context)
369
+ downloaded_file, temp_pdf_path = (
370
+ await self._step_07_download_pdf_from_url(
371
+ paper, io, context, auth_gateway, downloader
372
+ )
373
+ )
374
+ if downloaded_file:
375
+ self._step_07_handle_downloaded_pdf(
376
+ paper, io, downloaded_file, temp_pdf_path
377
+ )
378
+ else:
379
+ self._step_07_check_manual_download(io)
380
+ elif io.has_pdf():
381
+ logger.info(f"{self.name}: PDF already exists, skipping download")
382
+
383
+ def _step_08_extract_content(self, io, force):
384
+ if io.has_pdf() and (not io.has_content() or force):
385
+ logger.info(f"{self.name}: Extracting content...")
386
+ import scitex
387
+
388
+ try:
389
+ pdf_path = io.get_pdf_path()
390
+ content = scitex.io.load(str(pdf_path), ext="pdf")
391
+ if hasattr(content, "full_text"):
392
+ io.save_text(content.full_text)
393
+ logger.info(f"{self.name}: Content extracted")
394
+ except Exception as e:
395
+ logger.warning(f"{self.name}: Content extraction failed: {e}")
396
+
397
+ def _step_09_link_to_project(self, paper, io, project):
398
+ if project:
399
+ logger.info(f"{self.name}: Linking to project: {project}")
400
+ return self._link_to_project(paper, project, io)
401
+ return None
402
+
403
+ def _step_10_log_final_status(self, io):
404
+ logger.info(f"{self.name}: Complete")
405
+ status = io.get_all_files()
406
+ for filename, exists in status.items():
407
+ logger.info(f" {'✓' if exists else '✗'} {filename}")
408
+
409
+ # ----------------------------------------
410
+ # Helper functions
411
+ # ----------------------------------------
412
+ def _link_to_project(
413
+ self, paper: Paper, project: str, io: PaperIO
414
+ ) -> Path:
415
+ """Create human-readable symlink in project directory."""
416
+ from scitex.scholar import ScholarConfig
417
+
418
+ config = ScholarConfig()
419
+ project_dir = config.path_manager.get_library_project_dir(project)
420
+
421
+ # Gather data for entry name
422
+ pdf_files = list(io.paper_dir.glob("*.pdf"))
423
+ n_pdfs = len(pdf_files)
424
+ citation_count = paper.metadata.citation_count.total or 0
425
+ impact_factor = int(paper.metadata.publication.impact_factor or 0)
426
+ year = paper.metadata.basic.year or 0
427
+ first_author = (
428
+ paper.metadata.basic.authors[0].split()[-1]
429
+ if paper.metadata.basic.authors
430
+ else "Unknown"
431
+ )
432
+ journal_name = (
433
+ paper.metadata.publication.short_journal
434
+ or paper.metadata.publication.journal
435
+ or "Unknown"
436
+ )
437
+
438
+ # Use PathManager to generate entry name (single source of truth)
439
+ entry_name = config.path_manager.get_library_project_entry_dirname(
440
+ n_pdfs=n_pdfs,
441
+ citation_count=citation_count,
442
+ impact_factor=impact_factor,
443
+ year=year,
444
+ first_author=first_author,
445
+ journal_name=journal_name,
446
+ )
447
+
448
+ # Create relative symlink directly
449
+ symlink_path = project_dir / entry_name
450
+ target_path = Path("../MASTER") / paper.container.library_id
451
+
452
+ # Remove existing symlink if present
453
+ if symlink_path.exists() or symlink_path.is_symlink():
454
+ symlink_path.unlink()
455
+
456
+ # Create symlink with relative path (don't resolve to absolute)
457
+ symlink_path.symlink_to(target_path)
458
+ logger.success(f"{self.name}: Created symlink: {project}/{entry_name}")
459
+
460
+ return symlink_path
461
+
462
+ def _generate_paper_id(self, doi: str) -> str:
463
+ """Generate 8-digit library ID from DOI"""
464
+ content = f"DOI:{doi}"
465
+ return hashlib.md5(content.encode()).hexdigest()[:8].upper()
466
+
467
+ def _merge_metadata_into_paper(
468
+ self, paper: Paper, metadata_dict: dict
469
+ ) -> None:
470
+ """Merge metadata dictionary from ScholarEngine into Paper object."""
471
+
472
+ # Helper to safely update field with engine tracking and type conversion
473
+ def update_field(section, field_name, value, engines):
474
+ if value is not None:
475
+ # Convert types to match Paper model expectations
476
+ # IDs should be strings
477
+ if section == "id" and not isinstance(value, str):
478
+ value = str(value)
479
+
480
+ # Year should be integer
481
+ if field_name == "year" and not isinstance(value, int):
482
+ try:
483
+ value = int(value)
484
+ except (ValueError, TypeError):
485
+ logger.warning(
486
+ f"{self.name}: Could not convert year '{value}' to int"
487
+ )
488
+ return
489
+
490
+ # Citation counts should be integers
491
+ if section == "citation_count" and not isinstance(value, int):
492
+ try:
493
+ value = int(value)
494
+ except (ValueError, TypeError):
495
+ logger.warning(
496
+ f"{self.name}: Could not convert citation count '{value}' to int"
497
+ )
498
+ return
499
+
500
+ try:
501
+ section_obj = getattr(paper.metadata, section)
502
+ setattr(section_obj, field_name, value)
503
+ setattr(section_obj, f"{field_name}_engines", engines)
504
+ except Exception as e:
505
+ logger.warning(
506
+ f"{self.name}: Could not set {section}.{field_name}: {e}"
507
+ )
508
+
509
+ # ID section
510
+ if "id" in metadata_dict:
511
+ id_data = metadata_dict["id"]
512
+ for field in [
513
+ "doi",
514
+ "arxiv_id",
515
+ "pmid",
516
+ "corpus_id",
517
+ "semantic_id",
518
+ "ieee_id",
519
+ "scholar_id",
520
+ ]:
521
+ if field in id_data:
522
+ update_field(
523
+ "id",
524
+ field,
525
+ id_data[field],
526
+ id_data.get(f"{field}_engines", []),
527
+ )
528
+
529
+ # Basic section
530
+ if "basic" in metadata_dict:
531
+ basic_data = metadata_dict["basic"]
532
+ for field in [
533
+ "title",
534
+ "authors",
535
+ "year",
536
+ "abstract",
537
+ "keywords",
538
+ "type",
539
+ ]:
540
+ if field in basic_data:
541
+ update_field(
542
+ "basic",
543
+ field,
544
+ basic_data[field],
545
+ basic_data.get(f"{field}_engines", []),
546
+ )
547
+
548
+ # Citation count section
549
+ if "citation_count" in metadata_dict:
550
+ cc_data = metadata_dict["citation_count"]
551
+ # Handle total
552
+ if "total" in cc_data:
553
+ update_field(
554
+ "citation_count",
555
+ "total",
556
+ cc_data["total"],
557
+ cc_data.get("total_engines", []),
558
+ )
559
+ # Handle year-specific counts (convert "2015" → "y2015" for Pydantic model)
560
+ for year in range(2015, 2026):
561
+ year_str = str(year)
562
+ year_field = f"y{year}"
563
+ if year_str in cc_data:
564
+ update_field(
565
+ "citation_count",
566
+ year_field,
567
+ cc_data[year_str],
568
+ cc_data.get(f"{year_str}_engines", []),
569
+ )
570
+
571
+ # Publication section
572
+ if "publication" in metadata_dict:
573
+ pub_data = metadata_dict["publication"]
574
+ for field in [
575
+ "journal",
576
+ "short_journal",
577
+ "impact_factor",
578
+ "issn",
579
+ "volume",
580
+ "issue",
581
+ "first_page",
582
+ "last_page",
583
+ "pages",
584
+ "publisher",
585
+ ]:
586
+ if field in pub_data:
587
+ update_field(
588
+ "publication",
589
+ field,
590
+ pub_data[field],
591
+ pub_data.get(f"{field}_engines", []),
592
+ )
593
+
594
+ # URL section
595
+ if "url" in metadata_dict:
596
+ url_data = metadata_dict["url"]
597
+ for field in ["doi", "publisher", "arxiv", "corpus_id"]:
598
+ if field in url_data:
599
+ update_field(
600
+ "url",
601
+ field,
602
+ url_data[field],
603
+ url_data.get(f"{field}_engines", []),
604
+ )
605
+
606
+ logger.info(f"{self.name}: Merged metadata into Paper object")
607
+
608
+ def _enrich_impact_factor(self, paper: Paper) -> None:
609
+ """Add journal impact factor to paper metadata if not present."""
610
+ # Skip if already has impact factor
611
+ if paper.metadata.publication.impact_factor:
612
+ logger.success(f"{self.name}: Impact factor already present")
613
+ return
614
+
615
+ # Need journal name to lookup impact factor
616
+ journal = (
617
+ paper.metadata.publication.short_journal
618
+ or paper.metadata.publication.journal
619
+ )
620
+
621
+ if not journal:
622
+ logger.debug(f"{self.name}: No journal name, skipping IF lookup")
623
+ return
624
+
625
+ try:
626
+ from scitex.scholar.impact_factor import ImpactFactorEngine
627
+
628
+ if_engine = ImpactFactorEngine()
629
+ metrics = if_engine.get_metrics(journal)
630
+
631
+ if metrics and metrics.get("impact_factor"):
632
+ paper.metadata.publication.impact_factor = metrics[
633
+ "impact_factor"
634
+ ]
635
+ paper.metadata.publication.impact_factor_engines = [
636
+ metrics.get("source", "JCR")
637
+ ]
638
+ logger.info(
639
+ f"{self.name}: Impact factor added: {metrics['impact_factor']} (from {metrics.get('source', 'JCR')})"
640
+ )
641
+ else:
642
+ logger.debug(
643
+ f"{self.name}: No impact factor found for journal: {journal}"
644
+ )
645
+
646
+ except Exception as e:
647
+ logger.debug(f"{self.name}: Impact factor lookup failed: {e}")
648
+
649
+
650
+ def main(args):
651
+ """Run single paper pipeline"""
652
+
653
+ pipeline_single = ScholarPipelineSingle(
654
+ browser_mode=args.browser_mode, chrome_profile=args.chrome_profile
655
+ )
656
+
657
+ # Run pipeline (returns tuple of paper and symlink_path)
658
+ paper, symlink_path = asyncio.run(
659
+ pipeline_single.process_single_paper(
660
+ doi_or_title=args.doi_or_title,
661
+ project=args.project,
662
+ force=args.force,
663
+ )
664
+ )
665
+
666
+ return 0
667
+
668
+
669
+ def parse_args() -> argparse.Namespace:
670
+ """Parse command line arguments."""
671
+ parser = argparse.ArgumentParser(
672
+ description="Orchestrate full paper acquisition pipeline"
673
+ )
674
+ parser.add_argument(
675
+ "--doi-or-title",
676
+ type=str,
677
+ required=True,
678
+ help="DOI or paper title",
679
+ )
680
+ parser.add_argument(
681
+ "--project",
682
+ type=str,
683
+ default=None,
684
+ help="Project name for symlinking (optional)",
685
+ )
686
+ parser.add_argument(
687
+ "--browser-mode",
688
+ type=str,
689
+ choices=["stealth", "interactive"],
690
+ default="stealth",
691
+ help="Browser mode (default: interactive)",
692
+ )
693
+ parser.add_argument(
694
+ "--chrome-profile",
695
+ type=str,
696
+ required=True,
697
+ # default="system",
698
+ help="Chrome profile name (default: system, parallel workers: system_worker_0-7)",
699
+ )
700
+ parser.add_argument(
701
+ "--force",
702
+ "-f",
703
+ action="store_true",
704
+ default=False,
705
+ help="Force fresh processing, ignore existing files",
706
+ )
707
+ args = parser.parse_args()
708
+ return args
709
+
710
+
711
+ def run_main() -> None:
712
+ """Initialize scitex framework, run main function, and cleanup."""
713
+ global CONFIG, CC, sys, plt, rng
714
+
715
+ import sys
716
+
717
+ import matplotlib.pyplot as plt
718
+
719
+ import scitex as stx
720
+
721
+ args = parse_args()
722
+
723
+ CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
724
+ sys,
725
+ plt,
726
+ args=args,
727
+ file=__FILE__,
728
+ sdir_suffix=None,
729
+ verbose=False,
730
+ agg=True,
731
+ )
732
+
733
+ exit_status = main(args)
734
+
735
+ stx.session.close(
736
+ CONFIG,
737
+ verbose=False,
738
+ notify=False,
739
+ message="",
740
+ exit_status=exit_status,
741
+ )
742
+
743
+
744
+ if __name__ == "__main__":
745
+ run_main()
746
+
747
+ """
748
+ Usage:
749
+
750
+
751
+ # With Title
752
+ python -m scitex.scholar.pipelines.ScholarPipelineSingle \
753
+ --doi-or-title "Epileptic seizure forecasting with long short-term memory (LSTM) neural networks" \
754
+ --project neurovista \
755
+ --chrome-profile system \
756
+ --browser-mode interactive \
757
+ --force
758
+
759
+ # With DOI; Neurology; Manual Download required
760
+ python -m scitex.scholar.pipelines.ScholarPipelineSingle \
761
+ --doi-or-title "10.1212/wnl.0000000000200348" \
762
+ --project neurovista \
763
+ --chrome-profile system \
764
+ --browser-mode interactive
765
+ """
766
+
767
+ # EOF