scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1975 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-13 08:11:40 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/core/Scholar.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/core/Scholar.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ __FILE__ = __file__
15
+
16
+ """
17
+ Unified Scholar class for scientific literature management.
18
+
19
+ This is the main entry point for all scholar functionality, providing:
20
+ - Simple, intuitive API
21
+ - Smart defaults
22
+ - Method chaining
23
+ - Progressive disclosure of advanced features
24
+ """
25
+
26
+ import json
27
+ from datetime import datetime
28
+ from pathlib import Path
29
+ from typing import Any
30
+ from typing import Dict
31
+ from typing import List
32
+ from typing import Optional
33
+ from typing import Union
34
+ from copy import deepcopy
35
+ from scitex import logging
36
+ import shutil
37
+
38
+ # PDF extraction is now handled by scitex.io
39
+ from scitex.errors import ScholarError
40
+ from scitex.scholar.config import ScholarConfig
41
+
42
+ # Updated imports for current architecture
43
+ from scitex.scholar.auth import ScholarAuthManager
44
+ from scitex.browser.debugging import browser_logger
45
+ from scitex.scholar.browser import ScholarBrowserManager
46
+ from scitex.scholar.storage import LibraryManager
47
+ from scitex.scholar.storage import ScholarLibrary
48
+ from scitex.scholar.metadata_engines.ScholarEngine import ScholarEngine
49
+ from scitex.scholar.pdf_download.ScholarPDFDownloader import (
50
+ ScholarPDFDownloader,
51
+ )
52
+ from scitex.scholar.auth.core.AuthenticationGateway import (
53
+ AuthenticationGateway,
54
+ )
55
+ from scitex.scholar.url_finder.ScholarURLFinder import ScholarURLFinder
56
+
57
+ import asyncio
58
+ import nest_asyncio
59
+ from scitex.scholar.impact_factor.ImpactFactorEngine import ImpactFactorEngine
60
+
61
+ from .Papers import Papers
62
+
63
+ logger = logging.getLogger(__name__)
64
+
65
+
66
+ class Scholar:
67
+ """
68
+ Main interface for SciTeX Scholar - scientific literature management made simple.
69
+
70
+ By default, papers are automatically enriched with:
71
+ - Journal impact factors from impact_factor package (2024 JCR data)
72
+ - Citation counts from Semantic Scholar (via DOI/title matching)
73
+
74
+ Example usage:
75
+ # Basic search with automatic enrichment
76
+ scholar = Scholar()
77
+ papers = scholar.search("deep learning neuroscience")
78
+ # Papers now have impact_factor and citation_count populated
79
+ papers.save("my_pac.bib")
80
+
81
+ # Disable automatic enrichment if needed
82
+ config = ScholarConfig(enable_auto_enrich=False)
83
+ scholar = Scholar(config=config)
84
+
85
+ # Search specific source
86
+ papers = scholar.search("transformer models", sources='arxiv')
87
+
88
+ # Advanced workflow
89
+ papers = scholar.search("transformer models", year_min=2020) \\
90
+ .filter(min_citations=50) \\
91
+ .sort_by("impact_factor") \\
92
+ .save("transformers.bib")
93
+
94
+ # Local library
95
+ scholar._index_local_pdfs("./my_papers")
96
+ local_papers = scholar.search_local("attention mechanism")
97
+ """
98
+
99
+ @property
100
+ def name(self):
101
+ """Class name for logging."""
102
+ return self.__class__.__name__
103
+
104
+ def __init__(
105
+ self,
106
+ config: Optional[Union[ScholarConfig, str, Path]] = None,
107
+ project: Optional[str] = None,
108
+ project_description: Optional[str] = None,
109
+ browser_mode: Optional[str] = None,
110
+ ):
111
+ """
112
+ Initialize Scholar with configuration.
113
+
114
+ Args:
115
+ config: Can be:
116
+ - ScholarConfig instance
117
+ - Path to YAML config file (str or Path)
118
+ - None (uses ScholarConfig.load() to find config)
119
+ project: Default project name for operations
120
+ project_description: Optional description for the project
121
+ browser_mode: Browser mode ('stealth', 'interactive', 'manual')
122
+ """
123
+
124
+ self.config = self._init_config(config)
125
+
126
+ # Store browser mode for later use
127
+ self.browser_mode = browser_mode or "stealth"
128
+
129
+ # Set project and workspace
130
+ self.project = self.config.resolve("project", project, "default")
131
+ self.workspace_dir = self.config.path_manager.get_workspace_dir()
132
+
133
+ # Auto-create project directory if it doesn't exist
134
+ if project:
135
+ self._ensure_project_exists(project, project_description)
136
+
137
+ # Initialize service components (lazy loading for better performance)
138
+ # Use mangled names for private properties
139
+ self._Scholar__scholar_engine = (
140
+ None # Replaces DOIResolver and LibraryEnricher
141
+ )
142
+ self._Scholar__auth_manager = None
143
+ self._Scholar__browser_manager = None
144
+ self._Scholar__library_manager = None
145
+ self._Scholar__library = (
146
+ None # ScholarLibrary for high-level operations
147
+ )
148
+
149
+ # Show user-friendly initialization message with library location
150
+ library_path = self.config.get_library_project_dir()
151
+ if project:
152
+ project_path = library_path / project
153
+ logger.info(
154
+ f"Scholar initialized with project '{project}' at {project_path}"
155
+ )
156
+ else:
157
+ logger.info(
158
+ f"{self.name}: Scholar initialized (library: {library_path})"
159
+ )
160
+
161
+ # ----------------------------------------
162
+ # Enrichers
163
+ # ----------------------------------------
164
+ async def enrich_papers_async(self, papers: Papers) -> Papers:
165
+ """Async version of enrich_papers for use in async contexts.
166
+
167
+ Args:
168
+ papers: Papers collection to enrich.
169
+
170
+ Returns:
171
+ Enriched Papers collection
172
+ """
173
+ enriched_list = []
174
+
175
+ for paper in papers:
176
+ try:
177
+ # Use ScholarEngine to search and enrich
178
+ results = await self._scholar_engine.search_async(
179
+ title=paper.metadata.basic.title,
180
+ year=paper.metadata.basic.year,
181
+ authors=(
182
+ paper.metadata.basic.authors[0]
183
+ if paper.metadata.basic.authors
184
+ else None
185
+ ),
186
+ )
187
+
188
+ # Create a copy to avoid modifying original
189
+ enriched_paper = self._merge_enrichment_data(paper, results)
190
+ enriched_list.append(enriched_paper)
191
+ title = paper.metadata.basic.title or "No title"
192
+ logger.info(f"{self.name}: Enriched: {title[:50]}...")
193
+
194
+ except Exception as e:
195
+ title = paper.metadata.basic.title or "No title"
196
+ logger.warning(
197
+ f"{self.name}: Failed to enrich paper '{title[:50]}...': {e}"
198
+ )
199
+ enriched_list.append(
200
+ paper
201
+ ) # Keep original if enrichment fails
202
+
203
+ enriched_papers = Papers(enriched_list, project=self.project)
204
+
205
+ # Add impact factors as post-processing step
206
+ if self.config.resolve("enrich_impact_factors", None, True):
207
+ enriched_papers = self._enrich_impact_factors(enriched_papers)
208
+
209
+ return enriched_papers
210
+
211
+ def enrich_papers(
212
+ self, papers: Optional[Papers] = None
213
+ ) -> Union[Papers, Dict[str, int]]:
214
+ """Enrich papers with metadata from multiple sources.
215
+
216
+ Args:
217
+ papers: Papers collection to enrich. If None, enriches all papers in current project.
218
+
219
+ Returns:
220
+ - If papers provided: Returns enriched Papers collection
221
+ - If no papers: Returns dict with enrichment statistics for project
222
+ """
223
+
224
+ # If no papers provided, enrich entire project
225
+ if papers is None:
226
+ return self._enrich_current_project()
227
+
228
+ # Enrich the provided papers collection
229
+ enriched_list = []
230
+
231
+ nest_asyncio.apply() # Allow nested event loops
232
+
233
+ for paper in papers:
234
+ try:
235
+ # Use ScholarEngine to search and enrich
236
+ results = asyncio.run(
237
+ self._scholar_engine.search_async(
238
+ title=paper.metadata.basic.title,
239
+ year=paper.metadata.basic.year,
240
+ authors=(
241
+ paper.metadata.basic.authors[0]
242
+ if paper.metadata.basic.authors
243
+ else None
244
+ ),
245
+ )
246
+ )
247
+
248
+ # Create a copy to avoid modifying original
249
+ enriched_paper = self._merge_enrichment_data(paper, results)
250
+ enriched_list.append(enriched_paper)
251
+ title = paper.metadata.basic.title or "No title"
252
+ logger.info(f"{self.name}: Enriched: {title[:50]}...")
253
+
254
+ except Exception as e:
255
+ title = paper.metadata.basic.title or "No title"
256
+ logger.warning(
257
+ f"{self.name}: Failed to enrich paper '{title[:50]}...': {e}"
258
+ )
259
+ enriched_list.append(
260
+ paper
261
+ ) # Keep original if enrichment fails
262
+
263
+ enriched_papers = Papers(enriched_list, project=self.project)
264
+
265
+ # Add impact factors as post-processing step
266
+ if self.config.resolve("enrich_impact_factors", None, True):
267
+ enriched_papers = self._enrich_impact_factors(enriched_papers)
268
+
269
+ return enriched_papers
270
+
271
+ def _enrich_impact_factors(self, papers: "Papers") -> "Papers":
272
+ """Add journal impact factors to papers.
273
+
274
+ Args:
275
+ papers: Papers collection to enrich with impact factors
276
+
277
+ Returns:
278
+ Papers collection with impact factors added where available
279
+ """
280
+ try:
281
+ # Try JCR database first (fast)
282
+ jcr_engine = ImpactFactorEngine()
283
+ papers = jcr_engine.enrich_papers(papers)
284
+ return papers
285
+ except Exception as e:
286
+ logger.debug(
287
+ f"{self.name}: JCR engine unavailable: {e}, falling back to calculation method"
288
+ )
289
+
290
+ return papers
291
+
292
+ def _merge_enrichment_data(self, paper: "Paper", results: Dict) -> "Paper":
293
+ """Merge enrichment results into paper object.
294
+
295
+ Creates a new Paper object with merged data to avoid modifying the original.
296
+ """
297
+ # Import here to avoid circular dependency
298
+
299
+ enriched = deepcopy(paper)
300
+
301
+ # Results from ScholarEngine is already combined metadata, not individual engine results
302
+ if not results:
303
+ return enriched
304
+
305
+ # Extract from the combined metadata structure
306
+ # ID section
307
+ if "id" in results:
308
+ if results["id"].get("doi") and not enriched.metadata.id.doi:
309
+ enriched.metadata.set_doi(results["id"]["doi"])
310
+ if results["id"].get("pmid") and not enriched.metadata.id.pmid:
311
+ enriched.metadata.id.pmid = results["id"]["pmid"]
312
+ if (
313
+ results["id"].get("arxiv_id")
314
+ and not enriched.metadata.id.arxiv_id
315
+ ):
316
+ enriched.metadata.id.arxiv_id = results["id"]["arxiv_id"]
317
+ # Note: corpus_id, semantic_id, ieee_id are in results but not in Paper dataclass
318
+
319
+ # Basic metadata section
320
+ if "basic" in results:
321
+ # Always update abstract if found (key enrichment goal)
322
+ if results["basic"].get("abstract"):
323
+ enriched.metadata.basic.abstract = results["basic"]["abstract"]
324
+
325
+ # Update title if more complete
326
+ if results["basic"].get("title"):
327
+ new_title = results["basic"]["title"]
328
+ current_title = enriched.metadata.basic.title or ""
329
+ if not current_title or len(new_title) > len(current_title):
330
+ enriched.metadata.basic.title = new_title
331
+
332
+ # Update authors if found
333
+ if (
334
+ results["basic"].get("authors")
335
+ and not enriched.metadata.basic.authors
336
+ ):
337
+ enriched.metadata.basic.authors = results["basic"]["authors"]
338
+
339
+ # Update year if found
340
+ if (
341
+ results["basic"].get("year")
342
+ and not enriched.metadata.basic.year
343
+ ):
344
+ enriched.metadata.basic.year = results["basic"]["year"]
345
+
346
+ # Update keywords if found
347
+ if (
348
+ results["basic"].get("keywords")
349
+ and not enriched.metadata.basic.keywords
350
+ ):
351
+ enriched.metadata.basic.keywords = results["basic"]["keywords"]
352
+
353
+ # Publication metadata
354
+ if "publication" in results:
355
+ if (
356
+ results["publication"].get("journal")
357
+ and not enriched.metadata.publication.journal
358
+ ):
359
+ enriched.metadata.publication.journal = results["publication"][
360
+ "journal"
361
+ ]
362
+ if (
363
+ results["publication"].get("publisher")
364
+ and not enriched.metadata.publication.publisher
365
+ ):
366
+ enriched.metadata.publication.publisher = results[
367
+ "publication"
368
+ ]["publisher"]
369
+ if (
370
+ results["publication"].get("volume")
371
+ and not enriched.metadata.publication.volume
372
+ ):
373
+ enriched.metadata.publication.volume = results["publication"][
374
+ "volume"
375
+ ]
376
+ if (
377
+ results["publication"].get("issue")
378
+ and not enriched.metadata.publication.issue
379
+ ):
380
+ enriched.metadata.publication.issue = results["publication"][
381
+ "issue"
382
+ ]
383
+ if (
384
+ results["publication"].get("pages")
385
+ and not enriched.metadata.publication.pages
386
+ ):
387
+ enriched.metadata.publication.pages = results["publication"][
388
+ "pages"
389
+ ]
390
+
391
+ # Citation metadata
392
+ if "citation_count" in results:
393
+ # Try both "count" and "total" fields
394
+ count = results["citation_count"].get("count") or results[
395
+ "citation_count"
396
+ ].get("total")
397
+ if count:
398
+ # Always take the maximum citation count
399
+ current_count = enriched.metadata.citation_count.total or 0
400
+ if not current_count or count > current_count:
401
+ enriched.metadata.citation_count.total = count
402
+ # Note: influential_citation_count is in results but not in Paper dataclass
403
+
404
+ # URL metadata
405
+ if "url" in results:
406
+ if results["url"].get("pdf"):
407
+ # Check if this PDF is not already in the list
408
+ pdf_url = results["url"]["pdf"]
409
+ if not any(
410
+ p.get("url") == pdf_url for p in enriched.metadata.url.pdfs
411
+ ):
412
+ enriched.metadata.url.pdfs.append(
413
+ {"url": pdf_url, "source": "enrichment"}
414
+ )
415
+ if (
416
+ results["url"].get("url")
417
+ and not enriched.metadata.url.publisher
418
+ ):
419
+ enriched.metadata.url.publisher = results["url"]["url"]
420
+
421
+ # Note: Metrics section (journal_impact_factor, h_index) not stored in Paper dataclass
422
+
423
+ return enriched
424
+
425
+ def _enrich_current_project(self) -> Dict[str, int]:
426
+ """Enrich all papers in the current project.
427
+
428
+ Returns:
429
+ Dictionary with enrichment statistics
430
+ """
431
+ if not self.project:
432
+ raise ValueError(
433
+ "No project specified. Use Scholar(project='name') or provide papers to enrich()."
434
+ )
435
+
436
+ # Load papers from project library
437
+ papers = self.load_project(self.project)
438
+ logger.info(
439
+ f"{self.name}: Enriching {len(papers)} papers in project '{self.project}'"
440
+ )
441
+
442
+ # Enrich the papers
443
+ enriched_papers = self.enrich_papers(papers)
444
+
445
+ # Count successes
446
+ enriched_count = sum(
447
+ 1
448
+ for i, p in enumerate(enriched_papers)
449
+ if p.abstract
450
+ and not papers[i].abstract # Check if abstract was added
451
+ )
452
+
453
+ # Save enriched papers back to library
454
+ saved_ids = self.save_papers_to_library(enriched_papers)
455
+
456
+ return {
457
+ "enriched": enriched_count,
458
+ "failed": len(papers) - enriched_count,
459
+ "total": len(papers),
460
+ "saved": len(saved_ids),
461
+ }
462
+
463
+ # ----------------------------------------
464
+ # URL Finding (Orchestration)
465
+ # ----------------------------------------
466
+ async def _find_urls_for_doi_async(
467
+ self, doi: str, context
468
+ ) -> Dict[str, Any]:
469
+ """Find all URLs for a DOI (orchestration layer).
470
+
471
+ Workflow:
472
+ DOI → Publisher URL → PDF URLs → OpenURL (fallback)
473
+
474
+ Args:
475
+ doi: DOI string
476
+ context: Authenticated browser context
477
+
478
+ Returns:
479
+ Dictionary with URL information: {
480
+ "url_doi": "https://doi.org/...",
481
+ "url_publisher": "https://publisher.com/...",
482
+ "urls_pdf": [{"url": "...", "source": "zotero_translator"}],
483
+ "url_openurl_resolved": "..." (if fallback used)
484
+ }
485
+ """
486
+ from scitex.scholar.auth.gateway import (
487
+ normalize_doi_as_http,
488
+ resolve_publisher_url_by_navigating_to_doi_page,
489
+ OpenURLResolver,
490
+ )
491
+
492
+ # Initialize result
493
+ urls = {"url_doi": normalize_doi_as_http(doi)}
494
+
495
+ # Step 1: Resolve publisher URL
496
+ page = await context.new_page()
497
+ try:
498
+ url_publisher = (
499
+ await resolve_publisher_url_by_navigating_to_doi_page(
500
+ doi, page
501
+ )
502
+ )
503
+ urls["url_publisher"] = url_publisher
504
+ finally:
505
+ await page.close()
506
+
507
+ # Step 2: Find PDF URLs from publisher URL
508
+ url_finder = ScholarURLFinder(context, config=self.config)
509
+ urls_pdf = []
510
+
511
+ if url_publisher:
512
+ urls_pdf = await url_finder.find_pdf_urls(url_publisher)
513
+
514
+ # Step 3: Try OpenURL fallback if no PDFs found
515
+ if not urls_pdf:
516
+ openurl_resolver = OpenURLResolver(config=self.config)
517
+ page = await context.new_page()
518
+ try:
519
+ url_openurl_resolved = await openurl_resolver.resolve_doi(
520
+ doi, page
521
+ )
522
+ urls["url_openurl_resolved"] = url_openurl_resolved
523
+
524
+ if url_openurl_resolved and url_openurl_resolved != "skipped":
525
+ urls_pdf = await url_finder.find_pdf_urls(
526
+ url_openurl_resolved
527
+ )
528
+ finally:
529
+ await page.close()
530
+
531
+ # Deduplicate and store
532
+ urls["urls_pdf"] = (
533
+ self._deduplicate_pdf_urls(urls_pdf) if urls_pdf else []
534
+ )
535
+
536
+ return urls
537
+
538
+ def _deduplicate_pdf_urls(self, urls_pdf: List[Dict]) -> List[Dict]:
539
+ """Remove duplicate PDF URLs.
540
+
541
+ Args:
542
+ urls_pdf: List of PDF URL dicts
543
+
544
+ Returns:
545
+ Deduplicated list of PDF URL dicts
546
+ """
547
+ seen = set()
548
+ unique = []
549
+ for pdf in urls_pdf:
550
+ url = pdf.get("url") if isinstance(pdf, dict) else pdf
551
+ if url not in seen:
552
+ seen.add(url)
553
+ unique.append(pdf)
554
+ return unique
555
+
556
+ # ----------------------------------------
557
+ # PDF Downloaders
558
+ # ----------------------------------------
559
+ async def download_pdfs_from_dois_async(
560
+ self,
561
+ dois: List[str],
562
+ output_dir: Optional[Path] = None,
563
+ max_concurrent: int = 1,
564
+ ) -> Dict[str, int]:
565
+ """Download PDFs for given DOIs using ScholarPDFDownloader.
566
+
567
+ Args:
568
+ dois: List of DOI strings
569
+ output_dir: Output directory (not used - downloads to library MASTER)
570
+ max_concurrent: Maximum concurrent downloads (default: 1 for sequential)
571
+
572
+ Returns:
573
+ Dictionary with download statistics
574
+ """
575
+ if not dois:
576
+ return {"downloaded": 0, "failed": 0, "errors": 0}
577
+
578
+ # Get authenticated browser context
579
+ browser, context = (
580
+ await self._browser_manager.get_authenticated_browser_and_context_async()
581
+ )
582
+
583
+ try:
584
+ # Initialize PDF downloader with browser context
585
+ pdf_downloader = ScholarPDFDownloader(
586
+ context=context,
587
+ config=self.config,
588
+ )
589
+
590
+ # Use download_from_dois from ScholarPDFDownloader
591
+ # This handles parallel downloads with semaphore control
592
+ logger.info(
593
+ f"{self.name}: Starting PDF download for {len(dois)} DOIs (max_concurrent={max_concurrent})"
594
+ )
595
+
596
+ results = await pdf_downloader.download_from_dois(
597
+ dois=dois,
598
+ output_dir=str(output_dir) if output_dir else "/tmp/",
599
+ max_concurrent=max_concurrent,
600
+ )
601
+
602
+ # Process results and organize in library
603
+ stats = {"downloaded": 0, "failed": 0, "errors": 0}
604
+ library_dir = self.config.get_library_project_dir()
605
+ master_dir = library_dir / "MASTER"
606
+ master_dir.mkdir(parents=True, exist_ok=True)
607
+
608
+ for doi, downloaded_paths in zip(dois, results):
609
+ try:
610
+ if downloaded_paths and len(downloaded_paths) > 0:
611
+ # PDF was downloaded successfully
612
+ # Take the first downloaded PDF (if multiple)
613
+ temp_pdf_path = downloaded_paths[0]
614
+
615
+ # Generate paper ID and create storage
616
+ paper_id = self.config.path_manager._generate_paper_id(
617
+ doi=doi
618
+ )
619
+ storage_path = master_dir / paper_id
620
+ storage_path.mkdir(parents=True, exist_ok=True)
621
+
622
+ # Move PDF to MASTER library
623
+ pdf_filename = f"DOI_{doi.replace('/', '_').replace(':', '_')}.pdf"
624
+ master_pdf_path = storage_path / pdf_filename
625
+ shutil.move(str(temp_pdf_path), str(master_pdf_path))
626
+
627
+ # Create/update metadata
628
+ metadata_file = storage_path / "metadata.json"
629
+ if metadata_file.exists():
630
+ with open(metadata_file, "r") as f:
631
+ metadata = json.load(f)
632
+ else:
633
+ metadata = {
634
+ "doi": doi,
635
+ "scitex_id": paper_id,
636
+ "created_at": datetime.now().isoformat(),
637
+ "created_by": "SciTeX Scholar",
638
+ }
639
+
640
+ # Update metadata with PDF info
641
+ metadata["pdf_path"] = str(
642
+ master_pdf_path.relative_to(library_dir)
643
+ )
644
+ metadata["pdf_downloaded_at"] = (
645
+ datetime.now().isoformat()
646
+ )
647
+ metadata["pdf_size_bytes"] = (
648
+ master_pdf_path.stat().st_size
649
+ )
650
+ metadata["updated_at"] = datetime.now().isoformat()
651
+
652
+ with open(metadata_file, "w") as f:
653
+ json.dump(
654
+ metadata, f, indent=2, ensure_ascii=False
655
+ )
656
+
657
+ # Update symlink using LibraryManager
658
+ if self.project not in ["master", "MASTER"]:
659
+ self._library_manager.update_symlink(
660
+ master_storage_path=storage_path,
661
+ project=self.project,
662
+ )
663
+
664
+ logger.success(
665
+ f"{self.name}: Downloaded and organized PDF for {doi}: {master_pdf_path}"
666
+ )
667
+ stats["downloaded"] += 1
668
+ else:
669
+ logger.warning(
670
+ f"{self.name}: No PDF downloaded for DOI: {doi}"
671
+ )
672
+ stats["failed"] += 1
673
+
674
+ except Exception as e:
675
+ logger.error(
676
+ f"{self.name}: Failed to organize PDF for {doi}: {e}"
677
+ )
678
+ stats["errors"] += 1
679
+ stats["failed"] += 1
680
+
681
+ return stats
682
+
683
+ finally:
684
+ # Always close browser
685
+ await self._browser_manager.close()
686
+
687
+ async def _download_pdfs_sequential(
688
+ self, dois: List[str], output_dir: Optional[Path] = None
689
+ ) -> Dict[str, int]:
690
+ """Sequential PDF download with authentication gateway."""
691
+ results = {"downloaded": 0, "failed": 0, "errors": 0}
692
+
693
+ # Get authenticated browser context
694
+ browser, context = (
695
+ await self._browser_manager.get_authenticated_browser_and_context_async()
696
+ )
697
+
698
+ # Initialize authentication gateway (NEW)
699
+ auth_gateway = AuthenticationGateway(
700
+ auth_manager=self._auth_manager,
701
+ browser_manager=self._browser_manager,
702
+ config=self.config,
703
+ )
704
+
705
+ # Use simple downloader for sequential downloads
706
+ pdf_downloader = ScholarPDFDownloader(
707
+ context=context,
708
+ config=self.config,
709
+ )
710
+
711
+ library_dir = self.config.get_library_project_dir()
712
+ master_dir = library_dir / "MASTER"
713
+ project_dir = library_dir / self.project
714
+ master_dir.mkdir(parents=True, exist_ok=True)
715
+ project_dir.mkdir(parents=True, exist_ok=True)
716
+
717
+ for doi in dois:
718
+ try:
719
+ logger.info(f"{self.name}: Processing DOI: {doi}")
720
+
721
+ # NEW: Prepare authentication context BEFORE URL finding
722
+ # This establishes publisher-specific cookies if needed
723
+ _url_context = await auth_gateway.prepare_context_async(
724
+ doi=doi, context=context
725
+ )
726
+
727
+ # Step 1: Find URLs for the DOI (orchestration)
728
+ urls = await self._find_urls_for_doi_async(doi, context)
729
+
730
+ # Step 2: Get PDF URLs
731
+ pdf_urls = urls.get("urls_pdf", [])
732
+
733
+ if not pdf_urls:
734
+ logger.warning(
735
+ f"{self.name}: No PDF URLs found for DOI: {doi}"
736
+ )
737
+ results["failed"] += 1
738
+ continue
739
+
740
+ # Step 3: Try to download from each PDF URL
741
+ downloaded_path = None
742
+ for pdf_entry in pdf_urls:
743
+ # Handle both dict and string formats
744
+ pdf_url = (
745
+ pdf_entry.get("url")
746
+ if isinstance(pdf_entry, dict)
747
+ else pdf_entry
748
+ )
749
+
750
+ if not pdf_url:
751
+ continue
752
+
753
+ # Download to temp location first
754
+ temp_output = (
755
+ Path("/tmp")
756
+ / f"{doi.replace('/', '_').replace(':', '_')}.pdf"
757
+ )
758
+
759
+ # Download PDF using simple downloader
760
+ result = await pdf_downloader.download_from_url(
761
+ pdf_url=pdf_url, output_path=temp_output
762
+ )
763
+
764
+ if result and result.exists():
765
+ downloaded_path = result
766
+ break
767
+
768
+ if downloaded_path:
769
+ # Step 4: Store PDF in MASTER library with proper organization
770
+
771
+ # Generate unique ID from DOI using PathManager
772
+ paper_id = self.config.path_manager._generate_paper_id(
773
+ doi=doi
774
+ )
775
+
776
+ # Create MASTER storage directory
777
+ storage_path = master_dir / paper_id
778
+ storage_path.mkdir(parents=True, exist_ok=True)
779
+
780
+ # Try to get paper metadata to generate readable name
781
+ readable_name = None
782
+ temp_paper = None
783
+ try:
784
+ # Try to load paper from DOI to get metadata
785
+ from scitex.scholar.core.Paper import Paper
786
+ from scitex.scholar.core.Papers import Papers
787
+
788
+ temp_paper = Paper()
789
+ temp_paper.metadata.id.doi = doi
790
+ # Try to enrich to get author/year/journal using async method
791
+ temp_papers = Papers([temp_paper])
792
+ enriched = await self.enrich_papers_async(temp_papers)
793
+ if enriched and len(enriched) > 0:
794
+ temp_paper = enriched[0]
795
+
796
+ # Generate readable name from metadata
797
+ first_author = "Unknown"
798
+ authors = temp_paper.metadata.basic.authors
799
+ if authors and len(authors) > 0:
800
+ author_parts = authors[0].split()
801
+ if len(author_parts) > 1:
802
+ first_author = author_parts[-1] # Last name
803
+ else:
804
+ first_author = author_parts[0]
805
+
806
+ year = temp_paper.metadata.basic.year
807
+ year_str = str(year) if year else "Unknown"
808
+
809
+ journal_clean = "Unknown"
810
+ journal = temp_paper.metadata.publication.journal
811
+ if journal:
812
+ # Clean journal name - remove special chars, keep alphanumeric
813
+ journal_clean = "".join(
814
+ c for c in journal if c.isalnum() or c in " "
815
+ ).replace(" ", "")
816
+ if not journal_clean:
817
+ journal_clean = "Unknown"
818
+
819
+ # Format: Author-Year-Journal
820
+ readable_name = (
821
+ f"{first_author}-{year_str}-{journal_clean}"
822
+ )
823
+ except:
824
+ pass
825
+
826
+ # Fallback to DOI if metadata extraction failed
827
+ if not readable_name:
828
+ readable_name = (
829
+ f"DOI_{doi.replace('/', '_').replace(':', '_')}"
830
+ )
831
+
832
+ # Copy PDF to MASTER storage with ORIGINAL filename to track how downloaded
833
+ # The PDF filename preserves the DOI format for tracking
834
+ pdf_filename = (
835
+ f"DOI_{doi.replace('/', '_').replace(':', '_')}.pdf"
836
+ )
837
+ master_pdf_path = storage_path / pdf_filename
838
+ shutil.copy2(downloaded_path, master_pdf_path)
839
+
840
+ # Load existing metadata or create minimal new metadata
841
+ metadata_file = storage_path / "metadata.json"
842
+ if metadata_file.exists():
843
+ # Load existing rich metadata - DO NOT OVERWRITE IT
844
+ with open(metadata_file, "r") as f:
845
+ metadata = json.load(f)
846
+ logger.debug(
847
+ f"{self.name}: Loaded existing metadata for {paper_id}"
848
+ )
849
+ else:
850
+ # Create new minimal metadata only if none exists
851
+ metadata = {
852
+ "doi": doi,
853
+ "scitex_id": paper_id,
854
+ "created_at": datetime.now().isoformat(),
855
+ "created_by": "SciTeX Scholar",
856
+ }
857
+
858
+ # Add enriched paper metadata for new papers only
859
+ if temp_paper:
860
+ # Use Pydantic to_dict() for Paper
861
+ paper_dict = temp_paper.to_dict()
862
+ # Merge paper metadata
863
+ for key, value in paper_dict.items():
864
+ if value is not None and key not in [
865
+ "doi",
866
+ "scitex_id",
867
+ ]:
868
+ metadata[key] = value
869
+
870
+ # Add PDF information
871
+ metadata["pdf_path"] = str(
872
+ master_pdf_path.relative_to(library_dir)
873
+ )
874
+ metadata["pdf_downloaded_at"] = datetime.now().isoformat()
875
+ metadata["pdf_size_bytes"] = master_pdf_path.stat().st_size
876
+ metadata["updated_at"] = datetime.now().isoformat()
877
+
878
+ # Save updated metadata
879
+ with open(metadata_file, "w") as f:
880
+ json.dump(metadata, f, indent=2, ensure_ascii=False)
881
+
882
+ # Update symlink using LibraryManager
883
+ if self.project not in ["master", "MASTER"]:
884
+ self._library_manager.update_symlink(
885
+ master_storage_path=storage_path,
886
+ project=self.project,
887
+ )
888
+
889
+ # Clean up temp file
890
+ downloaded_path.unlink()
891
+
892
+ logger.success(
893
+ f"{self.name}: Downloaded PDF for {doi}: MASTER/{paper_id}/{pdf_filename}"
894
+ )
895
+ results["downloaded"] += 1
896
+ else:
897
+ logger.warning(
898
+ f"{self.name}: Failed to download any PDF for DOI: {doi}"
899
+ )
900
+ results["failed"] += 1
901
+
902
+ except Exception as e:
903
+ logger.error(f"{self.name}: Failed to process {doi}: {e}")
904
+ results["errors"] += 1
905
+ results["failed"] += 1
906
+
907
+ await self._browser_manager.close()
908
+ logger.info(f"{self.name}: PDF download complete: {results}")
909
+ return results
910
+
911
+ def download_pdfs_from_dois(
912
+ self, dois: List[str], output_dir: Optional[Path] = None
913
+ ) -> Dict[str, int]:
914
+ """Download PDFs for given DOIs.
915
+
916
+ Args:
917
+ dois: List of DOI strings
918
+ output_dir: Output directory (uses config default if None)
919
+
920
+ Returns:
921
+ Dictionary with download statistics
922
+ """
923
+ import asyncio
924
+
925
+ return asyncio.run(
926
+ self.download_pdfs_from_dois_async(dois, output_dir)
927
+ )
928
+
929
+ def download_pdfs_from_bibtex(
930
+ self,
931
+ bibtex_input: Union[str, Path, Papers],
932
+ output_dir: Optional[Path] = None,
933
+ ) -> Dict[str, int]:
934
+ """Download PDFs from BibTeX file or Papers collection.
935
+
936
+ Args:
937
+ bibtex_input: BibTeX file path, content string, or Papers collection
938
+ output_dir: Output directory (uses config default if None)
939
+
940
+ Returns:
941
+ Dictionary with download statistics
942
+ """
943
+ # Load papers if bibtex_input is not already Papers
944
+ if isinstance(bibtex_input, Papers):
945
+ papers = bibtex_input
946
+ else:
947
+ papers = self.load_bibtex(bibtex_input)
948
+
949
+ # Extract DOIs from papers
950
+ dois = [
951
+ paper.metadata.id.doi for paper in papers if paper.metadata.id.doi
952
+ ]
953
+
954
+ if not dois:
955
+ logger.warning(
956
+ f"{self.name}: No papers with DOIs found in BibTeX input"
957
+ )
958
+ return {"downloaded": 0, "failed": 0, "errors": 0}
959
+
960
+ logger.info(
961
+ f"{self.name}: Found {len(dois)} papers with DOIs out of {len(papers)} total papers"
962
+ )
963
+
964
+ # Download PDFs using DOI method
965
+ return self.download_pdfs_from_dois(dois, output_dir)
966
+
967
+ # ----------------------------------------
968
+ # Loaders
969
+ # ----------------------------------------
970
+ def load_project(self, project: Optional[str] = None) -> Papers:
971
+ """Load papers from a project using library manager service.
972
+
973
+ Args:
974
+ project: Project name (uses self.project if None)
975
+
976
+ Returns:
977
+ Papers collection from the project
978
+ """
979
+ project_name = project or self.project
980
+ if not project_name:
981
+ raise ValueError("No project specified")
982
+
983
+ # Load papers from library by reading symlinks in project directory
984
+ from ..core.Papers import Papers
985
+ from ..core.Paper import Paper
986
+ import json
987
+
988
+ logger.info(
989
+ f"{self.name}: Loading papers from project: {project_name}"
990
+ )
991
+
992
+ library_dir = self.config.get_library_project_dir()
993
+ project_dir = library_dir / project_name
994
+
995
+ if not project_dir.exists():
996
+ logger.warning(
997
+ f"{self.name}: Project directory does not exist: {project_dir}"
998
+ )
999
+ return Papers([], project=project_name)
1000
+
1001
+ papers = []
1002
+ for item in project_dir.iterdir():
1003
+ # Skip info directory and metadata files
1004
+ if item.name in ["info", "project_metadata.json", "README.md"]:
1005
+ continue
1006
+
1007
+ # Follow symlink to MASTER directory
1008
+ if item.is_symlink():
1009
+ master_path = item.resolve()
1010
+ if master_path.exists():
1011
+ # Load metadata.json from MASTER directory
1012
+ metadata_file = master_path / "metadata.json"
1013
+ if metadata_file.exists():
1014
+ try:
1015
+ with open(metadata_file, "r") as f:
1016
+ metadata = json.load(f)
1017
+
1018
+ # Create Paper object using from_dict class method
1019
+ paper = Paper.from_dict(metadata)
1020
+
1021
+ papers.append(paper)
1022
+ except Exception as e:
1023
+ logger.warning(
1024
+ f"{self.name}: Failed to load metadata from {metadata_file}: {e}"
1025
+ )
1026
+
1027
+ logger.info(
1028
+ f"{self.name}: Loaded {len(papers)} papers from project: {project_name}"
1029
+ )
1030
+ return Papers(papers, project=project_name)
1031
+
1032
+ def load_bibtex(self, bibtex_input: Union[str, Path]) -> Papers:
1033
+ """Load Papers collection from BibTeX file or content.
1034
+
1035
+ Args:
1036
+ bibtex_input: BibTeX file path or content string
1037
+
1038
+ Returns:
1039
+ Papers collection
1040
+ """
1041
+ # Use the internal library to load papers
1042
+ papers = self._library.papers_from_bibtex(bibtex_input)
1043
+
1044
+ # Convert to Papers collection
1045
+ from .Papers import Papers
1046
+
1047
+ papers_collection = Papers(
1048
+ papers, config=self.config, project=self.project
1049
+ )
1050
+ papers_collection.library = (
1051
+ self._library
1052
+ ) # Attach library for save operations
1053
+
1054
+ return papers_collection
1055
+
1056
+ # ----------------------------------------
1057
+ # Searchers
1058
+ # ----------------------------------------
1059
+ def search_library(
1060
+ self, query: str, project: Optional[str] = None
1061
+ ) -> Papers:
1062
+ """
1063
+ Search papers in local library.
1064
+
1065
+ For new literature search (not in library), use AI2 Scholar QA:
1066
+ https://scholarqa.allen.ai/chat/ then process with:
1067
+ papers = scholar.load_bibtex('file.bib') followed by scholar.enrich(papers)
1068
+
1069
+ Args:
1070
+ query: Search query
1071
+ project: Project filter (uses self.project if None)
1072
+
1073
+ Returns:
1074
+ Papers collection matching the query
1075
+ """
1076
+ # For now, return empty Papers until search is implemented
1077
+ from ..core.Papers import Papers
1078
+
1079
+ logger.info(f"{self.name}: Searching library for: {query}")
1080
+ return Papers([], project=project or self.project)
1081
+
1082
+ def search_across_projects(
1083
+ self, query: str, projects: Optional[List[str]] = None
1084
+ ) -> Papers:
1085
+ """Search for papers across multiple projects or the entire library.
1086
+
1087
+ Args:
1088
+ query: Search query
1089
+ projects: List of project names to search (None for all)
1090
+
1091
+ Returns:
1092
+ Papers collection with search results
1093
+ """
1094
+ if projects is None:
1095
+ # Search all projects
1096
+ all_projects = [p["name"] for p in self.list_projects()]
1097
+ else:
1098
+ all_projects = projects
1099
+
1100
+ all_papers = []
1101
+ for project in all_projects:
1102
+ try:
1103
+ project_papers = Papers.from_project(project, self.config)
1104
+ # Simple text search implementation
1105
+ matching_papers = [
1106
+ p
1107
+ for p in project_papers._papers
1108
+ if query.lower() in (p.title or "").lower()
1109
+ or query.lower() in (p.abstract or "").lower()
1110
+ or any(
1111
+ query.lower() in (author or "").lower()
1112
+ for author in (p.authors or [])
1113
+ )
1114
+ ]
1115
+ all_papers.extend(matching_papers)
1116
+ except Exception as e:
1117
+ logger.debug(
1118
+ f"{self.name}: Failed to search project {project}: {e}"
1119
+ )
1120
+
1121
+ return Papers(all_papers, config=self.config, project="search_results")
1122
+
1123
+ # ----------------------------------------
1124
+ # Savers
1125
+ # ----------------------------------------
1126
+ def save_papers_to_library(self, papers: Papers) -> List[str]:
1127
+ """Save papers collection to library.
1128
+
1129
+ Args:
1130
+ papers: Papers collection to save
1131
+
1132
+ Returns:
1133
+ List of paper IDs saved
1134
+ """
1135
+ saved_ids = []
1136
+ for paper in papers:
1137
+ try:
1138
+ paper_id = self._library.save_paper(paper)
1139
+ saved_ids.append(paper_id)
1140
+ except Exception as e:
1141
+ logger.warning(f"{self.name}: Failed to save paper: {e}")
1142
+
1143
+ logger.info(
1144
+ f"{self.name}: Saved {len(saved_ids)}/{len(papers)} papers to library"
1145
+ )
1146
+ return saved_ids
1147
+
1148
+ def save_papers_as_bibtex(
1149
+ self, papers: Papers, output_path: Optional[Union[str, Path]] = None
1150
+ ) -> str:
1151
+ """Save papers to BibTeX format with enrichment metadata.
1152
+
1153
+ Args:
1154
+ papers: Papers collection to save
1155
+ output_path: Optional path to save the BibTeX file
1156
+
1157
+ Returns:
1158
+ BibTeX content as string with enrichment metadata included
1159
+ """
1160
+ from ..storage.BibTeXHandler import BibTeXHandler
1161
+
1162
+ bibtex_handler = BibTeXHandler(
1163
+ project=self.project, config=self.config
1164
+ )
1165
+ return bibtex_handler.papers_to_bibtex(papers, output_path)
1166
+
1167
+ # ----------------------------------------
1168
+ # Project Handlers
1169
+ # ----------------------------------------
1170
+ def _ensure_project_exists(
1171
+ self, project: str, description: Optional[str] = None
1172
+ ) -> Path:
1173
+ """Ensure project directory exists, create if needed (PRIVATE).
1174
+
1175
+ Args:
1176
+ project: Project name
1177
+ description: Optional project description
1178
+
1179
+ Returns:
1180
+ Path to the project directory
1181
+ """
1182
+ project_dir = self.config.get_library_project_dir() / project
1183
+ info_dir = project_dir / "info"
1184
+
1185
+ # Create project and info directories
1186
+ if not project_dir.exists():
1187
+ project_dir.mkdir(parents=True, exist_ok=True)
1188
+ logger.info(
1189
+ f"{self.name}: Auto-created project directory: {project}"
1190
+ )
1191
+
1192
+ # Ensure info directory exists
1193
+ info_dir.mkdir(parents=True, exist_ok=True)
1194
+
1195
+ # Create/move metadata file to info directory
1196
+ old_metadata_file = (
1197
+ project_dir / "project_metadata.json"
1198
+ ) # Old location
1199
+ metadata_file = info_dir / "project_metadata.json" # New location
1200
+
1201
+ # Move existing metadata file if it exists in old location
1202
+ if old_metadata_file.exists() and not metadata_file.exists():
1203
+ shutil.move(str(old_metadata_file), str(metadata_file))
1204
+ logger.info(
1205
+ f"{self.name}: Moved project metadata to info directory"
1206
+ )
1207
+
1208
+ # Create metadata file if it doesn't exist
1209
+ if not metadata_file.exists():
1210
+ metadata = {
1211
+ "name": project,
1212
+ "description": description or f"Papers for {project} project",
1213
+ "created": datetime.now().isoformat(),
1214
+ "created_by": "SciTeX Scholar",
1215
+ "auto_created": True,
1216
+ }
1217
+
1218
+ with open(metadata_file, "w") as f:
1219
+ json.dump(metadata, f, indent=2)
1220
+
1221
+ logger.info(
1222
+ f"{self.name}: Created project metadata in info directory: {project}"
1223
+ )
1224
+
1225
+ return project_dir
1226
+
1227
+ def _create_project_metadata(
1228
+ self, project: str, description: Optional[str] = None
1229
+ ) -> Path:
1230
+ """Create project directory and metadata (PRIVATE).
1231
+
1232
+ DEPRECATED: Use _ensure_project_exists instead.
1233
+
1234
+ Args:
1235
+ project: Project name
1236
+ description: Optional project description
1237
+
1238
+ Returns:
1239
+ Path to the created project directory
1240
+ """
1241
+ # Just use the new method that puts metadata in info directory
1242
+ return self._ensure_project_exists(project, description)
1243
+
1244
+ def list_projects(self) -> List[Dict[str, Any]]:
1245
+ """List all projects in the Scholar library.
1246
+
1247
+ Returns:
1248
+ List of project information dictionaries
1249
+ """
1250
+ library_dir = self.config.get_library_project_dir()
1251
+ projects = []
1252
+
1253
+ for item in library_dir.iterdir():
1254
+ if item.is_dir() and item.name != "MASTER":
1255
+ project_info = {
1256
+ "name": item.name,
1257
+ "path": str(item),
1258
+ "papers_count": len(list(item.glob("*"))),
1259
+ "created": None,
1260
+ "description": None,
1261
+ }
1262
+
1263
+ # Load metadata if exists
1264
+ metadata_file = item / "project_metadata.json"
1265
+ if metadata_file.exists():
1266
+ try:
1267
+ with open(metadata_file, "r") as f:
1268
+ metadata = json.load(f)
1269
+ project_info.update(metadata)
1270
+ except Exception as e:
1271
+ logger.debug(
1272
+ f"Failed to load metadata for {item.name}: {e}"
1273
+ )
1274
+
1275
+ projects.append(project_info)
1276
+
1277
+ return sorted(projects, key=lambda x: x["name"])
1278
+
1279
+ # ----------------------------------------
1280
+ # Library Handlers
1281
+ # ----------------------------------------
1282
+ def get_library_statistics(self) -> Dict[str, Any]:
1283
+ """Get comprehensive statistics for the entire Scholar library.
1284
+
1285
+ Returns:
1286
+ Dictionary with library-wide statistics
1287
+ """
1288
+ master_dir = self.config.get_library_master_dir()
1289
+ projects = self.list_projects()
1290
+
1291
+ stats = {
1292
+ "total_projects": len(projects),
1293
+ "total_papers": (
1294
+ len(list(master_dir.glob("*"))) if master_dir.exists() else 0
1295
+ ),
1296
+ "projects": projects,
1297
+ "library_path": str(self.config.get_library_project_dir()),
1298
+ "master_path": str(master_dir),
1299
+ }
1300
+
1301
+ # Calculate storage usage
1302
+ if master_dir.exists():
1303
+ total_size = sum(
1304
+ f.stat().st_size for f in master_dir.rglob("*") if f.is_file()
1305
+ )
1306
+ stats["storage_mb"] = total_size / (1024 * 1024)
1307
+ else:
1308
+ stats["storage_mb"] = 0
1309
+
1310
+ return stats
1311
+
1312
+ def backup_library(self, backup_path: Union[str, Path]) -> Dict[str, Any]:
1313
+ """Create a backup of the Scholar library.
1314
+
1315
+ Args:
1316
+ backup_path: Path for the backup
1317
+
1318
+ Returns:
1319
+ Dictionary with backup information
1320
+ """
1321
+ backup_path = Path(backup_path)
1322
+ library_path = self.config.get_library_project_dir()
1323
+
1324
+ if not library_path.exists():
1325
+ raise ScholarError("Library directory does not exist")
1326
+
1327
+ # Create timestamped backup
1328
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1329
+ backup_dir = backup_path / f"scholar_library_backup_{timestamp}"
1330
+
1331
+ logger.info(f"{self.name}: Creating library backup at {backup_dir}")
1332
+ shutil.copytree(library_path, backup_dir)
1333
+
1334
+ # Create backup metadata
1335
+ backup_info = {
1336
+ "timestamp": timestamp,
1337
+ "source": str(library_path),
1338
+ "backup": str(backup_dir),
1339
+ "size_mb": sum(
1340
+ f.stat().st_size for f in backup_dir.rglob("*") if f.is_file()
1341
+ )
1342
+ / (1024 * 1024),
1343
+ }
1344
+
1345
+ metadata_file = backup_dir / "backup_metadata.json"
1346
+ with open(metadata_file, "w") as f:
1347
+ json.dump(backup_info, f, indent=2)
1348
+
1349
+ logger.info(
1350
+ f"{self.name}: Library backup completed: {backup_info['size_mb']:.2f} MB"
1351
+ )
1352
+ return backup_info
1353
+
1354
+ # =========================================================================
1355
+ # PIPELINE METHODS (Phase 2)
1356
+ # =========================================================================
1357
+
1358
+ async def process_paper_async(
1359
+ self,
1360
+ title: Optional[str] = None,
1361
+ doi: Optional[str] = None,
1362
+ project: Optional[str] = None,
1363
+ ) -> "Paper":
1364
+ """
1365
+ Complete sequential pipeline for processing a single paper.
1366
+
1367
+ Accepts either title OR doi. Uses storage-first approach:
1368
+ each stage checks storage before processing.
1369
+
1370
+ Workflow:
1371
+ Stage 0: Resolve DOI from title (if needed)
1372
+ Stage 1: Load or create Paper from storage
1373
+ Stage 2: Find PDF URLs → save to storage
1374
+ Stage 3: Download PDF → save to storage
1375
+ Stage 4: Update project symlinks
1376
+
1377
+ Args:
1378
+ title: Paper title (will resolve DOI using engine)
1379
+ doi: DOI of the paper (preferred if available)
1380
+ project: Project name (uses self.project if None)
1381
+
1382
+ Returns:
1383
+ Fully processed Paper object
1384
+
1385
+ Examples:
1386
+ # With DOI (direct)
1387
+ paper = await scholar.process_paper_async(doi="10.1038/s41598-017-02626-y")
1388
+
1389
+ # With title (resolves DOI first)
1390
+ paper = await scholar.process_paper_async(
1391
+ title="Attention Is All You Need"
1392
+ )
1393
+ """
1394
+ from scitex.scholar.core.Paper import Paper
1395
+
1396
+ # Validate input
1397
+ if not title and not doi:
1398
+ raise ValueError("Must provide either title or doi")
1399
+
1400
+ project = project or self.project
1401
+
1402
+ logger.info(f"{'='*60}")
1403
+ logger.info(f"Processing paper")
1404
+ if title:
1405
+ logger.info(f"Title: {title[:50]}...")
1406
+ if doi:
1407
+ logger.info(f"DOI: {doi}")
1408
+ logger.info(f"{'='*60}")
1409
+
1410
+ # Stage 0: Resolve DOI from title (if needed)
1411
+ if not doi and title:
1412
+ logger.info(f"Stage 0: Resolving DOI from title...")
1413
+
1414
+ # Use ScholarEngine to search and get DOI
1415
+ results = await self._scholar_engine.search_async(title=title)
1416
+
1417
+ if results and results.get("id", {}).get("doi"):
1418
+ doi = results["id"]["doi"]
1419
+ logger.success(f"Resolved DOI: {doi}")
1420
+ else:
1421
+ logger.error(f"Could not resolve DOI from title: {title}")
1422
+ raise ValueError(f"Could not resolve DOI from title: {title}")
1423
+
1424
+ # Generate paper ID from DOI
1425
+ paper_id = self.config.path_manager._generate_paper_id(doi=doi)
1426
+ storage_path = self.config.get_library_master_dir() / paper_id
1427
+
1428
+ logger.info(f"Paper ID: {paper_id}")
1429
+ logger.info(f"Storage: {storage_path}")
1430
+
1431
+ # Stage 1: Load or create Paper from storage
1432
+ logger.info(f"\nStage 1: Loading/creating metadata...")
1433
+ if self._library_manager.has_metadata(paper_id):
1434
+ # Load existing from storage
1435
+ paper = self._library_manager.load_paper_from_id(paper_id)
1436
+ logger.info(f"Loaded existing metadata from storage")
1437
+ else:
1438
+ # Create new Paper
1439
+ paper = Paper()
1440
+ paper.metadata.set_doi(doi)
1441
+ paper.container.scitex_id = paper_id
1442
+
1443
+ # If we have title, save it
1444
+ if title:
1445
+ paper.metadata.basic.title = title
1446
+
1447
+ # Create storage and save
1448
+ self._library_manager.save_paper_incremental(paper_id, paper)
1449
+ logger.success(f"Created new paper entry in storage")
1450
+
1451
+ # Stage 2: Check/find URLs
1452
+ logger.info(f"\nStage 2: Checking/finding PDF URLs...")
1453
+ if not self._library_manager.has_urls(paper_id):
1454
+ logger.info(f"Finding PDF URLs for DOI: {doi}")
1455
+ browser, context = (
1456
+ await self._browser_manager.get_authenticated_browser_and_context_async()
1457
+ )
1458
+ try:
1459
+ url_finder = ScholarURLFinder(context, config=self.config)
1460
+ urls = await url_finder.find_pdf_urls(doi)
1461
+
1462
+ paper.metadata.url.pdfs = urls
1463
+ self._library_manager.save_paper_incremental(paper_id, paper)
1464
+ logger.success(f"Found {len(urls)} PDF URLs, saved to storage")
1465
+ finally:
1466
+ await self._browser_manager.close()
1467
+ else:
1468
+ logger.info(
1469
+ f"PDF URLs already in storage ({len(paper.metadata.url.pdfs)} URLs)"
1470
+ )
1471
+
1472
+ # Stage 3: Check/download PDF
1473
+ logger.info(f"\nStage 3: Checking/downloading PDF...")
1474
+ if not self._library_manager.has_pdf(paper_id):
1475
+ logger.info(f"Downloading PDF...")
1476
+ if paper.metadata.url.pdfs:
1477
+ browser, context = (
1478
+ await self._browser_manager.get_authenticated_browser_and_context_async()
1479
+ )
1480
+ try:
1481
+ downloader = ScholarPDFDownloader(
1482
+ context, config=self.config
1483
+ )
1484
+
1485
+ pdf_url = (
1486
+ paper.metadata.url.pdfs[0]["url"]
1487
+ if isinstance(paper.metadata.url.pdfs[0], dict)
1488
+ else paper.metadata.url.pdfs[0]
1489
+ )
1490
+ temp_path = storage_path / "main.pdf"
1491
+
1492
+ result = await downloader.download_from_url(
1493
+ pdf_url, temp_path, doi=doi
1494
+ )
1495
+ if result and result.exists():
1496
+ paper.metadata.path.pdfs.append(str(result))
1497
+ self._library_manager.save_paper_incremental(
1498
+ paper_id, paper
1499
+ )
1500
+ logger.success(
1501
+ f"{self.name}: Downloaded PDF, saved to storage"
1502
+ )
1503
+ else:
1504
+ logger.warning(f"{self.name}: Failed to download PDF")
1505
+ finally:
1506
+ await self._browser_manager.close()
1507
+ else:
1508
+ logger.warning(
1509
+ f"{self.name}: No PDF URLs available for download"
1510
+ )
1511
+ else:
1512
+ logger.info(f"{self.name}: PDF already in storage")
1513
+
1514
+ # Stage 4: Update project symlinks
1515
+ if project and project not in ["master", "MASTER"]:
1516
+ logger.info(
1517
+ f"{self.name}: \nStage 4: Updating project symlinks..."
1518
+ )
1519
+ self._library_manager.update_symlink(
1520
+ master_storage_path=storage_path,
1521
+ project=project,
1522
+ )
1523
+ logger.success(
1524
+ f"{self.name}: Updated symlink in project: {project}"
1525
+ )
1526
+
1527
+ logger.info(f"\n{'='*60}")
1528
+ logger.success(f"{self.name}: Paper processing complete")
1529
+ logger.info(f"{'='*60}\n")
1530
+
1531
+ return paper
1532
+
1533
+ def process_paper(
1534
+ self,
1535
+ title: Optional[str] = None,
1536
+ doi: Optional[str] = None,
1537
+ project: Optional[str] = None,
1538
+ ) -> "Paper":
1539
+ """
1540
+ Synchronous wrapper for process_paper_async.
1541
+
1542
+ See process_paper_async() for full documentation.
1543
+ """
1544
+ return asyncio.run(
1545
+ self.process_paper_async(title=title, doi=doi, project=project)
1546
+ )
1547
+
1548
+ # =========================================================================
1549
+ # PIPELINE METHODS (Phase 3) - Parallel Papers Processing
1550
+ # =========================================================================
1551
+
1552
+ async def process_papers_async(
1553
+ self,
1554
+ papers: Union["Papers", List[str]],
1555
+ project: Optional[str] = None,
1556
+ max_concurrent: int = 3,
1557
+ ) -> "Papers":
1558
+ """
1559
+ Process multiple papers with controlled parallelism.
1560
+
1561
+ Each paper goes through complete sequential pipeline.
1562
+ Semaphore controls how many papers process concurrently.
1563
+
1564
+ Architecture:
1565
+ - Parallel papers (max_concurrent at a time)
1566
+ - Sequential stages per paper
1567
+ - Storage checks before each stage
1568
+
1569
+ Args:
1570
+ papers: Papers collection or list of DOIs
1571
+ project: Project name (uses self.project if None)
1572
+ max_concurrent: Maximum concurrent papers (default: 3)
1573
+ Set to 1 for purely sequential processing
1574
+
1575
+ Returns:
1576
+ Papers collection with processed papers
1577
+
1578
+ Examples:
1579
+ # Process Papers collection (parallel)
1580
+ papers = scholar.load_bibtex("papers.bib")
1581
+ processed = await scholar.process_papers_async(papers, max_concurrent=3)
1582
+
1583
+ # Process DOI list (sequential)
1584
+ dois = ["10.1038/...", "10.1016/...", "10.1109/..."]
1585
+ processed = await scholar.process_papers_async(dois, max_concurrent=1)
1586
+ """
1587
+ from scitex.scholar.core.Papers import Papers
1588
+
1589
+ project = project or self.project
1590
+
1591
+ # Convert input to Papers collection
1592
+ if isinstance(papers, list):
1593
+ # List of DOI strings
1594
+ papers_list = []
1595
+ for doi in papers:
1596
+ from scitex.scholar.core.Paper import Paper
1597
+
1598
+ p = Paper()
1599
+ p.metadata.set_doi(doi)
1600
+ papers_list.append(p)
1601
+ papers = Papers(papers_list, project=project, config=self.config)
1602
+
1603
+ total = len(papers)
1604
+ logger.info(f"{self.name}: \n{'='*60}")
1605
+ logger.info(
1606
+ f"{self.name}: Processing {total} papers (max_concurrent={max_concurrent})"
1607
+ )
1608
+ logger.info(f"{self.name}: Project: {project}")
1609
+ logger.info(f"{self.name}: {'='*60}\n")
1610
+
1611
+ # Use semaphore for controlled parallelism
1612
+ semaphore = asyncio.Semaphore(max_concurrent)
1613
+
1614
+ async def process_with_semaphore(paper, index):
1615
+ """Process one paper with semaphore control."""
1616
+ async with semaphore:
1617
+ logger.info(
1618
+ f"{self.name}: \n[{index}/{total}] Starting paper..."
1619
+ )
1620
+ try:
1621
+ result = await self.process_paper_async(
1622
+ title=paper.metadata.basic.title,
1623
+ doi=paper.metadata.id.doi,
1624
+ project=project,
1625
+ )
1626
+ logger.success(f"{self.name}: [{index}/{total}] Completed")
1627
+ return result
1628
+ except Exception as e:
1629
+ logger.error(f"{self.name}: [{index}/{total}] Failed: {e}")
1630
+ return None
1631
+
1632
+ # Create tasks for all papers
1633
+ tasks = [
1634
+ process_with_semaphore(paper, i + 1)
1635
+ for i, paper in enumerate(papers)
1636
+ ]
1637
+
1638
+ # Process with controlled parallelism
1639
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1640
+
1641
+ # Filter successful results
1642
+ processed_papers = []
1643
+ errors = 0
1644
+ for i, result in enumerate(results):
1645
+ if isinstance(result, Exception):
1646
+ logger.error(
1647
+ f"{self.name}: Paper {i+1} raised exception: {result}"
1648
+ )
1649
+ errors += 1
1650
+ elif result is not None:
1651
+ processed_papers.append(result)
1652
+
1653
+ # Summary
1654
+ logger.info(f"{self.name}: \n{'='*60}")
1655
+ logger.info(f"{self.name}: Batch Processing Complete")
1656
+ logger.info(f"{self.name}: Total: {total}")
1657
+ logger.info(f"{self.name}: Successful: {len(processed_papers)}")
1658
+ logger.info(f"{self.name}: Failed: {total - len(processed_papers)}")
1659
+ logger.info(f"{self.name}: Errors: {errors}")
1660
+ logger.info(f"{self.name}: {'='*60}\n")
1661
+
1662
+ return Papers(processed_papers, project=project, config=self.config)
1663
+
1664
+ def process_papers(
1665
+ self,
1666
+ papers: Union["Papers", List[str]],
1667
+ project: Optional[str] = None,
1668
+ max_concurrent: int = 3,
1669
+ ) -> "Papers":
1670
+ """
1671
+ Synchronous wrapper for process_papers_async.
1672
+
1673
+ See process_papers_async() for full documentation.
1674
+ """
1675
+ return asyncio.run(
1676
+ self.process_papers_async(
1677
+ papers=papers,
1678
+ project=project,
1679
+ max_concurrent=max_concurrent,
1680
+ )
1681
+ )
1682
+
1683
+ # =========================================================================
1684
+ # INTERNAL SERVICES (PRIVATE - users should not access these directly)
1685
+ # =========================================================================
1686
+ def _init_config(self, config):
1687
+ # Handle different config input types
1688
+ if config is None:
1689
+ return ScholarConfig.load() # Auto-detect config
1690
+ elif isinstance(config, (str, Path)):
1691
+ return ScholarConfig.from_yaml(config)
1692
+ elif isinstance(config, ScholarConfig):
1693
+ return config
1694
+ else:
1695
+ raise TypeError(f"Invalid config type: {type(config)}")
1696
+
1697
+ @property
1698
+ def _scholar_engine(self) -> ScholarEngine:
1699
+ """Get Scholar engine for search and enrichment (PRIVATE)."""
1700
+ if (
1701
+ not hasattr(self, "__scholar_engine")
1702
+ or self.__scholar_engine is None
1703
+ ):
1704
+ self.__scholar_engine = ScholarEngine(config=self.config)
1705
+ return self.__scholar_engine
1706
+
1707
+ @property
1708
+ def _auth_manager(self) -> ScholarAuthManager:
1709
+ """Get authentication manager service (PRIVATE)."""
1710
+ if not hasattr(self, "__auth_manager") or self.__auth_manager is None:
1711
+ self.__auth_manager = ScholarAuthManager()
1712
+ return self.__auth_manager
1713
+
1714
+ @property
1715
+ def _browser_manager(self) -> ScholarBrowserManager:
1716
+ """Get browser manager service (PRIVATE)."""
1717
+ if (
1718
+ not hasattr(self, "__browser_manager")
1719
+ or self.__browser_manager is None
1720
+ ):
1721
+ self.__browser_manager = ScholarBrowserManager(
1722
+ auth_manager=self._auth_manager,
1723
+ chrome_profile_name="system",
1724
+ browser_mode=self.browser_mode,
1725
+ )
1726
+ return self.__browser_manager
1727
+
1728
+ @property
1729
+ def _library_manager(self) -> LibraryManager:
1730
+ """Get library manager service - low-level operations (PRIVATE)."""
1731
+ if (
1732
+ not hasattr(self, "__library_manager")
1733
+ or self.__library_manager is None
1734
+ ):
1735
+ self.__library_manager = LibraryManager(
1736
+ project=self.project, config=self.config
1737
+ )
1738
+ return self.__library_manager
1739
+
1740
+ @property
1741
+ def _library(self) -> ScholarLibrary:
1742
+ """Get Scholar library service - high-level operations (PRIVATE)."""
1743
+ if not hasattr(self, "__library") or self.__library is None:
1744
+ self.__library = ScholarLibrary(
1745
+ project=self.project, config=self.config
1746
+ )
1747
+ return self.__library
1748
+
1749
+
1750
+ # Export all classes and functions
1751
+ __all__ = ["Scholar"]
1752
+
1753
+ if __name__ == "__main__":
1754
+ from scitex.scholar.core.Paper import Paper
1755
+ from scitex.scholar.core.Papers import Papers
1756
+
1757
+ def main():
1758
+ """Demonstrate Scholar class usage - Clean API Demo."""
1759
+ print("\n" + "=" * 60)
1760
+ print("🎓 Scholar Module Demo - Clean API")
1761
+ print("=" * 60 + "\n")
1762
+
1763
+ # ----------------------------------------
1764
+ # 1. Initialize Scholar
1765
+ # ----------------------------------------
1766
+ print("1️⃣ Initialize Scholar")
1767
+ print("-" * 60)
1768
+ scholar = Scholar(
1769
+ project="demo_project",
1770
+ project_description="Demo project for testing Scholar API",
1771
+ )
1772
+ print(f"✓ Scholar initialized")
1773
+ print(f" Project: {scholar.project}")
1774
+ print(f" Workspace: {scholar.get_workspace_dir()}")
1775
+ print()
1776
+
1777
+ # Demonstrate project management
1778
+ print("2. Project Management:")
1779
+ try:
1780
+ # Create a new project
1781
+ project_dir = scholar._create_project_metadata(
1782
+ "neural_networks_2024",
1783
+ description="Collection of neural network papers from 2024",
1784
+ )
1785
+ print(f" ✅ Created project: neural_networks_2024")
1786
+ print(f" 📂 Project directory: {project_dir}")
1787
+
1788
+ # List all projects
1789
+ projects = scholar.list_projects()
1790
+ print(f" 📋 Total projects in library: {len(projects)}")
1791
+ for project in projects[:3]: # Show first 3
1792
+ print(
1793
+ f" - {project['name']}: {project.get('description', 'No description')}"
1794
+ )
1795
+ if len(projects) > 3:
1796
+ print(f" ... and {len(projects) - 3} more")
1797
+
1798
+ except Exception as e:
1799
+ print(f" ⚠️ Project management demo skipped: {e}")
1800
+ print()
1801
+
1802
+ # Demonstrate library statistics
1803
+ print("3. Library Statistics:")
1804
+ try:
1805
+ stats = scholar.get_library_statistics()
1806
+ print(f" 📊 Total projects: {stats['total_projects']}")
1807
+ print(f" 📚 Total papers: {stats['total_papers']}")
1808
+ print(f" 💾 Storage usage: {stats['storage_mb']:.2f} MB")
1809
+ print(f" 📁 Library path: {stats['library_path']}")
1810
+
1811
+ except Exception as e:
1812
+ print(f" ⚠️ Library statistics demo skipped: {e}")
1813
+ print()
1814
+
1815
+ # Demonstrate paper and project operations
1816
+ print("4. Working with Papers:")
1817
+
1818
+ # Create some sample papers with Pydantic structure
1819
+ p1 = Paper()
1820
+ p1.metadata.basic.title = (
1821
+ "Vision Transformer: An Image Is Worth 16x16 Words"
1822
+ )
1823
+ p1.metadata.basic.authors = ["Dosovitskiy, Alexey", "Beyer, Lucas"]
1824
+ p1.metadata.basic.year = 2021
1825
+ p1.metadata.basic.keywords = [
1826
+ "vision transformer",
1827
+ "computer vision",
1828
+ "attention",
1829
+ ]
1830
+ p1.metadata.publication.journal = "ICLR"
1831
+ p1.metadata.set_doi("10.48550/arXiv.2010.11929")
1832
+ p1.container.projects = ["neural_networks_2024"]
1833
+
1834
+ p2 = Paper()
1835
+ p2.metadata.basic.title = "Scaling Laws for Neural Language Models"
1836
+ p2.metadata.basic.authors = ["Kaplan, Jared", "McCandlish, Sam"]
1837
+ p2.metadata.basic.year = 2020
1838
+ p2.metadata.basic.keywords = ["scaling laws", "language models", "GPT"]
1839
+ p2.metadata.publication.journal = "arXiv preprint"
1840
+ p2.metadata.set_doi("10.48550/arXiv.2001.08361")
1841
+ p2.container.projects = ["neural_networks_2024"]
1842
+
1843
+ sample_papers = [p1, p2]
1844
+
1845
+ # Create Papers collection
1846
+ papers = Papers(
1847
+ sample_papers,
1848
+ project="neural_networks_2024",
1849
+ config=scholar.config,
1850
+ )
1851
+ print(f" 📝 Created collection with {len(papers)} papers")
1852
+
1853
+ # Use Scholar to work with the collection
1854
+ # Switch project by creating new instance (cleaner pattern)
1855
+ scholar = Scholar(project="neural_networks_2024")
1856
+ print(f" 🎯 Set Scholar project to: {scholar.project}")
1857
+ print()
1858
+
1859
+ # Demonstrate DOI resolution workflow
1860
+ print("5. Scholar Workflow Integration:")
1861
+ try:
1862
+ # Create a sample BibTeX content for demonstration
1863
+ sample_bibtex = """
1864
+ @article{sample2024,
1865
+ title = {Sample Paper for Demo},
1866
+ author = {Demo, Author},
1867
+ year = {2024},
1868
+ journal = {Demo Journal}
1869
+ }
1870
+ """
1871
+
1872
+ # Demonstrate BibTeX loading
1873
+ papers_from_bibtex = scholar.load_bibtex(sample_bibtex.strip())
1874
+ print(f" 📄 Loaded {len(papers_from_bibtex)} papers from BibTeX")
1875
+
1876
+ # Demonstrate project loading
1877
+ if scholar.project:
1878
+ try:
1879
+ project_papers = scholar.load_project()
1880
+ print(
1881
+ f" 📂 Loaded {len(project_papers)} papers from current project"
1882
+ )
1883
+ except:
1884
+ print(
1885
+ f" 📂 Current project is empty or doesn't exist yet"
1886
+ )
1887
+
1888
+ except Exception as e:
1889
+ print(f" ⚠️ Workflow demo partially skipped: {e}")
1890
+ print()
1891
+
1892
+ # Demonstrate search capabilities
1893
+ print("6. Search Capabilities:")
1894
+ try:
1895
+ # Search across projects
1896
+ search_results = scholar.search_across_projects("transformer")
1897
+ print(
1898
+ f" 🔍 Search for 'transformer': {len(search_results)} results across all projects"
1899
+ )
1900
+
1901
+ # Search in current library (existing papers)
1902
+ library_search = scholar.search_library("vision")
1903
+ print(
1904
+ f" 🔍 Library search for 'vision': {len(library_search)} results"
1905
+ )
1906
+
1907
+ except Exception as e:
1908
+ print(f" ⚠️ Search demo skipped: {e}")
1909
+ print()
1910
+
1911
+ # Demonstrate configuration access
1912
+ print("7. Configuration Management:")
1913
+ print(f" ⚙️ Scholar directory: {scholar.config.paths.scholar_dir}")
1914
+ print(
1915
+ f" ⚙️ Library directory: {scholar.config.get_library_project_dir()}"
1916
+ )
1917
+ print(
1918
+ f" ⚙️ Debug mode: {scholar.config.resolve('debug_mode', default=False)}"
1919
+ )
1920
+ print()
1921
+
1922
+ # Demonstrate service access (internal components)
1923
+ print("8. Service Components (Internal):")
1924
+ print(
1925
+ f" 🔧 Scholar Engine: {type(scholar._scholar_engine).__name__}"
1926
+ )
1927
+ print(f" 🔧 Auth Manager: {type(scholar._auth_manager).__name__}")
1928
+ print(
1929
+ f" 🔧 Browser Manager: {type(scholar._browser_manager).__name__}"
1930
+ )
1931
+ print(
1932
+ f" 🔧 Library Manager: {type(scholar._library_manager).__name__}"
1933
+ )
1934
+ print()
1935
+
1936
+ # Demonstrate backup capabilities
1937
+ print("9. Backup and Maintenance:")
1938
+ try:
1939
+ import tempfile
1940
+ import os
1941
+
1942
+ # Create a temporary backup location
1943
+ backup_dir = Path(tempfile.mkdtemp()) / "scholar_backup"
1944
+ backup_info = scholar.backup_library(backup_dir)
1945
+ print(f" 💾 Library backup created:")
1946
+ print(f" 📁 Location: {backup_info['backup']}")
1947
+ print(f" 📊 Size: {backup_info['size_mb']:.2f} MB")
1948
+ print(f" 🕐 Timestamp: {backup_info['timestamp']}")
1949
+
1950
+ # Clean up
1951
+ import shutil
1952
+
1953
+ shutil.rmtree(backup_dir, ignore_errors=True)
1954
+
1955
+ except Exception as e:
1956
+ print(f" ⚠️ Backup demo skipped: {e}")
1957
+ print()
1958
+
1959
+ print("Scholar global management demo completed! ✨")
1960
+ print()
1961
+ print("💡 Key Scholar Capabilities:")
1962
+ print(" • Global library management and statistics")
1963
+ print(" • Project creation and organization")
1964
+ print(" • Cross-project search and analysis")
1965
+ print(" • Integration with Paper and Papers classes")
1966
+ print(" • DOI resolution and metadata enrichment")
1967
+ print(" • PDF download and browser automation")
1968
+ print(" • Backup and maintenance operations")
1969
+ print()
1970
+
1971
+ main()
1972
+
1973
+ # python -m scitex.scholar.core.Scholar
1974
+
1975
+ # EOF