scitex 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1091) hide show
  1. scitex/__init__.py +19 -8
  2. scitex/__main__.py +2 -1
  3. scitex/__version__.py +1 -1
  4. scitex/_optional_deps.py +13 -20
  5. scitex/ai/__init__.py +5 -0
  6. scitex/ai/_gen_ai/_Anthropic.py +3 -1
  7. scitex/ai/_gen_ai/_BaseGenAI.py +3 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +1 -1
  9. scitex/ai/_gen_ai/_Google.py +3 -2
  10. scitex/ai/_gen_ai/_Llama.py +4 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +3 -1
  12. scitex/ai/_gen_ai/_PARAMS.py +1 -0
  13. scitex/ai/_gen_ai/_Perplexity.py +3 -1
  14. scitex/ai/_gen_ai/__init__.py +1 -0
  15. scitex/ai/_gen_ai/_format_output_func.py +3 -1
  16. scitex/ai/classification/CrossValidationExperiment.py +8 -14
  17. scitex/ai/classification/examples/timeseries_cv_demo.py +128 -112
  18. scitex/ai/classification/reporters/_BaseClassificationReporter.py +2 -0
  19. scitex/ai/classification/reporters/_ClassificationReporter.py +30 -45
  20. scitex/ai/classification/reporters/_MultiClassificationReporter.py +8 -11
  21. scitex/ai/classification/reporters/_SingleClassificationReporter.py +126 -182
  22. scitex/ai/classification/reporters/__init__.py +1 -1
  23. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +213 -119
  24. scitex/ai/classification/reporters/reporter_utils/__init__.py +28 -36
  25. scitex/ai/classification/reporters/reporter_utils/aggregation.py +125 -143
  26. scitex/ai/classification/reporters/reporter_utils/data_models.py +128 -120
  27. scitex/ai/classification/reporters/reporter_utils/reporting.py +507 -340
  28. scitex/ai/classification/reporters/reporter_utils/storage.py +4 -1
  29. scitex/ai/classification/reporters/reporter_utils/validation.py +141 -154
  30. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +204 -129
  31. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +215 -171
  32. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +17 -17
  33. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +67 -143
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +67 -143
  35. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +12 -13
  36. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +231 -144
  37. scitex/ai/classification/timeseries/__init__.py +2 -4
  38. scitex/ai/classification/timeseries/_normalize_timestamp.py +3 -0
  39. scitex/ai/clustering/_pca.py +0 -1
  40. scitex/ai/clustering/_umap.py +1 -2
  41. scitex/ai/feature_extraction/__init__.py +10 -8
  42. scitex/ai/feature_extraction/vit.py +0 -1
  43. scitex/ai/feature_selection/feature_selection.py +3 -8
  44. scitex/ai/metrics/_calc_conf_mat.py +2 -0
  45. scitex/ai/metrics/_calc_feature_importance.py +3 -7
  46. scitex/ai/metrics/_calc_pre_rec_auc.py +5 -5
  47. scitex/ai/metrics/_calc_roc_auc.py +4 -2
  48. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +35 -20
  49. scitex/ai/metrics/_calc_silhouette_score.py +1 -3
  50. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +0 -3
  51. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +0 -3
  52. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +0 -3
  53. scitex/ai/optim/_optimizers.py +1 -1
  54. scitex/ai/plt/__init__.py +6 -1
  55. scitex/ai/plt/_plot_feature_importance.py +1 -3
  56. scitex/ai/plt/_plot_learning_curve.py +9 -24
  57. scitex/ai/plt/_plot_optuna_study.py +4 -3
  58. scitex/ai/plt/_plot_pre_rec_curve.py +9 -15
  59. scitex/ai/plt/_plot_roc_curve.py +6 -8
  60. scitex/ai/plt/_stx_conf_mat.py +121 -122
  61. scitex/ai/sampling/undersample.py +3 -2
  62. scitex/ai/sklearn/__init__.py +2 -2
  63. scitex/ai/training/_LearningCurveLogger.py +23 -10
  64. scitex/ai/utils/_check_params.py +0 -1
  65. scitex/benchmark/__init__.py +15 -25
  66. scitex/benchmark/benchmark.py +124 -117
  67. scitex/benchmark/monitor.py +117 -107
  68. scitex/benchmark/profiler.py +61 -58
  69. scitex/bridge/__init__.py +110 -0
  70. scitex/bridge/_helpers.py +149 -0
  71. scitex/bridge/_plt_vis.py +529 -0
  72. scitex/bridge/_protocol.py +283 -0
  73. scitex/bridge/_stats_plt.py +261 -0
  74. scitex/bridge/_stats_vis.py +265 -0
  75. scitex/browser/__init__.py +0 -2
  76. scitex/browser/auth/__init__.py +0 -0
  77. scitex/browser/auth/google.py +16 -11
  78. scitex/browser/automation/CookieHandler.py +2 -3
  79. scitex/browser/collaboration/__init__.py +3 -0
  80. scitex/browser/collaboration/auth_helpers.py +3 -1
  81. scitex/browser/collaboration/collaborative_agent.py +2 -0
  82. scitex/browser/collaboration/interactive_panel.py +2 -2
  83. scitex/browser/collaboration/shared_session.py +20 -11
  84. scitex/browser/collaboration/standard_interactions.py +1 -0
  85. scitex/browser/core/BrowserMixin.py +12 -30
  86. scitex/browser/core/ChromeProfileManager.py +9 -24
  87. scitex/browser/debugging/_browser_logger.py +15 -25
  88. scitex/browser/debugging/_failure_capture.py +9 -2
  89. scitex/browser/debugging/_highlight_element.py +15 -6
  90. scitex/browser/debugging/_show_grid.py +5 -6
  91. scitex/browser/debugging/_sync_session.py +4 -3
  92. scitex/browser/debugging/_test_monitor.py +14 -5
  93. scitex/browser/debugging/_visual_cursor.py +46 -35
  94. scitex/browser/interaction/click_center.py +4 -3
  95. scitex/browser/interaction/click_with_fallbacks.py +7 -10
  96. scitex/browser/interaction/close_popups.py +79 -66
  97. scitex/browser/interaction/fill_with_fallbacks.py +8 -8
  98. scitex/browser/pdf/__init__.py +3 -1
  99. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +11 -10
  100. scitex/browser/pdf/detect_chrome_pdf_viewer.py +3 -6
  101. scitex/browser/remote/CaptchaHandler.py +109 -96
  102. scitex/browser/remote/ZenRowsAPIClient.py +91 -97
  103. scitex/browser/remote/ZenRowsBrowserManager.py +138 -112
  104. scitex/browser/stealth/HumanBehavior.py +4 -9
  105. scitex/browser/stealth/StealthManager.py +11 -26
  106. scitex/capture/__init__.py +17 -17
  107. scitex/capture/__main__.py +2 -3
  108. scitex/capture/capture.py +23 -51
  109. scitex/capture/cli.py +14 -39
  110. scitex/capture/gif.py +5 -9
  111. scitex/capture/mcp_server.py +7 -20
  112. scitex/capture/session.py +4 -3
  113. scitex/capture/utils.py +18 -53
  114. scitex/cli/__init__.py +1 -1
  115. scitex/cli/cloud.py +158 -116
  116. scitex/cli/config.py +224 -0
  117. scitex/cli/main.py +41 -40
  118. scitex/cli/scholar.py +60 -27
  119. scitex/cli/security.py +14 -20
  120. scitex/cli/web.py +87 -90
  121. scitex/cli/writer.py +51 -45
  122. scitex/cloud/__init__.py +14 -11
  123. scitex/cloud/_matplotlib_hook.py +6 -6
  124. scitex/config/README.md +313 -0
  125. scitex/config/{PriorityConfig.py → _PriorityConfig.py} +114 -17
  126. scitex/config/_ScitexConfig.py +319 -0
  127. scitex/config/__init__.py +41 -9
  128. scitex/config/_paths.py +325 -0
  129. scitex/config/default.yaml +81 -0
  130. scitex/context/_suppress_output.py +2 -3
  131. scitex/db/_BaseMixins/_BaseBackupMixin.py +3 -1
  132. scitex/db/_BaseMixins/_BaseBatchMixin.py +3 -1
  133. scitex/db/_BaseMixins/_BaseBlobMixin.py +3 -1
  134. scitex/db/_BaseMixins/_BaseImportExportMixin.py +1 -3
  135. scitex/db/_BaseMixins/_BaseIndexMixin.py +3 -1
  136. scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +1 -3
  137. scitex/db/_BaseMixins/_BaseQueryMixin.py +3 -1
  138. scitex/db/_BaseMixins/_BaseRowMixin.py +3 -1
  139. scitex/db/_BaseMixins/_BaseTableMixin.py +3 -1
  140. scitex/db/_BaseMixins/_BaseTransactionMixin.py +1 -3
  141. scitex/db/_BaseMixins/__init__.py +1 -1
  142. scitex/db/__init__.py +9 -1
  143. scitex/db/__main__.py +8 -21
  144. scitex/db/_check_health.py +15 -31
  145. scitex/db/_delete_duplicates.py +7 -4
  146. scitex/db/_inspect.py +22 -38
  147. scitex/db/_inspect_optimized.py +89 -85
  148. scitex/db/_postgresql/_PostgreSQL.py +0 -1
  149. scitex/db/_postgresql/_PostgreSQLMixins/_BlobMixin.py +3 -1
  150. scitex/db/_postgresql/_PostgreSQLMixins/_ConnectionMixin.py +1 -3
  151. scitex/db/_postgresql/_PostgreSQLMixins/_ImportExportMixin.py +1 -3
  152. scitex/db/_postgresql/_PostgreSQLMixins/_MaintenanceMixin.py +1 -4
  153. scitex/db/_postgresql/_PostgreSQLMixins/_QueryMixin.py +3 -3
  154. scitex/db/_postgresql/_PostgreSQLMixins/_RowMixin.py +3 -1
  155. scitex/db/_postgresql/_PostgreSQLMixins/_TransactionMixin.py +1 -3
  156. scitex/db/_postgresql/__init__.py +1 -1
  157. scitex/db/_sqlite3/_SQLite3.py +2 -4
  158. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +11 -12
  159. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +19 -14
  160. scitex/db/_sqlite3/_SQLite3Mixins/_BatchMixin.py +3 -1
  161. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +7 -7
  162. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +118 -111
  163. scitex/db/_sqlite3/_SQLite3Mixins/_ConnectionMixin.py +8 -10
  164. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +17 -45
  165. scitex/db/_sqlite3/_SQLite3Mixins/_ImportExportMixin.py +1 -3
  166. scitex/db/_sqlite3/_SQLite3Mixins/_IndexMixin.py +3 -1
  167. scitex/db/_sqlite3/_SQLite3Mixins/_QueryMixin.py +3 -4
  168. scitex/db/_sqlite3/_SQLite3Mixins/_RowMixin.py +9 -9
  169. scitex/db/_sqlite3/_SQLite3Mixins/_TableMixin.py +18 -11
  170. scitex/db/_sqlite3/_SQLite3Mixins/__init__.py +1 -0
  171. scitex/db/_sqlite3/__init__.py +1 -1
  172. scitex/db/_sqlite3/_delete_duplicates.py +13 -11
  173. scitex/decorators/__init__.py +29 -4
  174. scitex/decorators/_auto_order.py +43 -43
  175. scitex/decorators/_batch_fn.py +12 -6
  176. scitex/decorators/_cache_disk.py +8 -9
  177. scitex/decorators/_cache_disk_async.py +8 -7
  178. scitex/decorators/_combined.py +19 -13
  179. scitex/decorators/_converters.py +16 -3
  180. scitex/decorators/_deprecated.py +32 -22
  181. scitex/decorators/_numpy_fn.py +18 -4
  182. scitex/decorators/_pandas_fn.py +17 -5
  183. scitex/decorators/_signal_fn.py +17 -3
  184. scitex/decorators/_torch_fn.py +32 -15
  185. scitex/decorators/_xarray_fn.py +23 -9
  186. scitex/dev/_analyze_code_flow.py +0 -2
  187. scitex/dict/_DotDict.py +15 -19
  188. scitex/dict/_flatten.py +1 -0
  189. scitex/dict/_listed_dict.py +1 -0
  190. scitex/dict/_pop_keys.py +1 -0
  191. scitex/dict/_replace.py +1 -0
  192. scitex/dict/_safe_merge.py +1 -0
  193. scitex/dict/_to_str.py +2 -3
  194. scitex/dsp/__init__.py +13 -4
  195. scitex/dsp/_crop.py +3 -1
  196. scitex/dsp/_detect_ripples.py +3 -1
  197. scitex/dsp/_modulation_index.py +3 -1
  198. scitex/dsp/_time.py +3 -1
  199. scitex/dsp/_wavelet.py +0 -1
  200. scitex/dsp/example.py +0 -5
  201. scitex/dsp/filt.py +4 -0
  202. scitex/dsp/utils/__init__.py +4 -1
  203. scitex/dsp/utils/pac.py +3 -3
  204. scitex/dt/_normalize_timestamp.py +4 -1
  205. scitex/errors.py +3 -6
  206. scitex/etc/__init__.py +1 -1
  207. scitex/gen/_DimHandler.py +6 -6
  208. scitex/gen/__init__.py +5 -1
  209. scitex/gen/_deprecated_close.py +1 -0
  210. scitex/gen/_deprecated_start.py +5 -3
  211. scitex/gen/_detect_environment.py +44 -41
  212. scitex/gen/_detect_notebook_path.py +51 -47
  213. scitex/gen/_embed.py +1 -1
  214. scitex/gen/_get_notebook_path.py +81 -62
  215. scitex/gen/_inspect_module.py +0 -1
  216. scitex/gen/_norm.py +16 -7
  217. scitex/gen/_norm_cache.py +78 -65
  218. scitex/gen/_print_config.py +0 -3
  219. scitex/gen/_src.py +2 -3
  220. scitex/gen/_title_case.py +3 -2
  221. scitex/gen/_to_even.py +8 -8
  222. scitex/gen/_transpose.py +3 -3
  223. scitex/gen/misc.py +0 -3
  224. scitex/gists/_SigMacro_processFigure_S.py +2 -2
  225. scitex/gists/_SigMacro_toBlue.py +2 -2
  226. scitex/gists/__init__.py +4 -1
  227. scitex/git/_branch.py +19 -11
  228. scitex/git/_clone.py +23 -15
  229. scitex/git/_commit.py +10 -12
  230. scitex/git/_init.py +15 -38
  231. scitex/git/_remote.py +9 -3
  232. scitex/git/_result.py +3 -0
  233. scitex/git/_retry.py +2 -5
  234. scitex/git/_types.py +4 -0
  235. scitex/git/_validation.py +8 -8
  236. scitex/git/_workflow.py +4 -4
  237. scitex/io/__init__.py +2 -1
  238. scitex/io/_glob.py +2 -2
  239. scitex/io/_json2md.py +3 -3
  240. scitex/io/_load.py +6 -8
  241. scitex/io/_load_cache.py +71 -71
  242. scitex/io/_load_configs.py +2 -3
  243. scitex/io/_load_modules/_H5Explorer.py +6 -12
  244. scitex/io/_load_modules/_ZarrExplorer.py +3 -3
  245. scitex/io/_load_modules/_bibtex.py +62 -63
  246. scitex/io/_load_modules/_canvas.py +4 -9
  247. scitex/io/_load_modules/_catboost.py +7 -2
  248. scitex/io/_load_modules/_hdf5.py +2 -0
  249. scitex/io/_load_modules/_image.py +5 -1
  250. scitex/io/_load_modules/_matlab.py +3 -1
  251. scitex/io/_load_modules/_optuna.py +0 -1
  252. scitex/io/_load_modules/_pdf.py +38 -29
  253. scitex/io/_load_modules/_sqlite3.py +1 -0
  254. scitex/io/_load_modules/_txt.py +2 -0
  255. scitex/io/_load_modules/_xml.py +9 -9
  256. scitex/io/_load_modules/_zarr.py +12 -10
  257. scitex/io/_metadata.py +76 -37
  258. scitex/io/_qr_utils.py +18 -13
  259. scitex/io/_save.py +220 -63
  260. scitex/io/_save_modules/__init__.py +7 -2
  261. scitex/io/_save_modules/_bibtex.py +66 -61
  262. scitex/io/_save_modules/_canvas.py +5 -6
  263. scitex/io/_save_modules/_catboost.py +2 -2
  264. scitex/io/_save_modules/_csv.py +4 -4
  265. scitex/io/_save_modules/_excel.py +5 -9
  266. scitex/io/_save_modules/_hdf5.py +9 -21
  267. scitex/io/_save_modules/_html.py +5 -5
  268. scitex/io/_save_modules/_image.py +105 -8
  269. scitex/io/_save_modules/_joblib.py +2 -2
  270. scitex/io/_save_modules/_json.py +51 -6
  271. scitex/io/_save_modules/_listed_dfs_as_csv.py +2 -1
  272. scitex/io/_save_modules/_listed_scalars_as_csv.py +2 -1
  273. scitex/io/_save_modules/_matlab.py +2 -2
  274. scitex/io/_save_modules/_numpy.py +6 -8
  275. scitex/io/_save_modules/_pickle.py +4 -4
  276. scitex/io/_save_modules/_plotly.py +3 -3
  277. scitex/io/_save_modules/_tex.py +23 -25
  278. scitex/io/_save_modules/_text.py +2 -2
  279. scitex/io/_save_modules/_yaml.py +9 -9
  280. scitex/io/_save_modules/_zarr.py +15 -15
  281. scitex/io/utils/__init__.py +2 -1
  282. scitex/io/utils/h5_to_zarr.py +173 -155
  283. scitex/linalg/__init__.py +1 -1
  284. scitex/linalg/_geometric_median.py +4 -3
  285. scitex/logging/_Tee.py +5 -7
  286. scitex/logging/__init__.py +18 -19
  287. scitex/logging/_config.py +4 -1
  288. scitex/logging/_context.py +6 -5
  289. scitex/logging/_formatters.py +2 -3
  290. scitex/logging/_handlers.py +19 -20
  291. scitex/logging/_levels.py +9 -17
  292. scitex/logging/_logger.py +74 -15
  293. scitex/logging/_print_capture.py +17 -17
  294. scitex/nn/_BNet.py +1 -3
  295. scitex/nn/_Filters.py +6 -2
  296. scitex/nn/_ModulationIndex.py +3 -1
  297. scitex/nn/_PAC.py +3 -2
  298. scitex/nn/_PSD.py +0 -1
  299. scitex/nn/__init__.py +16 -3
  300. scitex/path/_clean.py +10 -8
  301. scitex/path/_find.py +1 -1
  302. scitex/path/_get_spath.py +1 -2
  303. scitex/path/_mk_spath.py +1 -1
  304. scitex/path/_symlink.py +5 -10
  305. scitex/pd/__init__.py +4 -1
  306. scitex/pd/_force_df.py +24 -24
  307. scitex/pd/_get_unique.py +1 -0
  308. scitex/pd/_merge_columns.py +1 -1
  309. scitex/pd/_round.py +11 -7
  310. scitex/pd/_to_xy.py +0 -1
  311. scitex/plt/REQUESTS.md +191 -0
  312. scitex/plt/__init__.py +185 -87
  313. scitex/plt/_subplots/_AxesWrapper.py +22 -6
  314. scitex/plt/_subplots/_AxisWrapper.py +100 -39
  315. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +74 -52
  316. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +183 -73
  317. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +61 -45
  318. scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +26 -14
  319. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +80 -73
  320. scitex/plt/_subplots/_FigWrapper.py +93 -60
  321. scitex/plt/_subplots/_SubplotsWrapper.py +135 -68
  322. scitex/plt/_subplots/__init__.py +10 -0
  323. scitex/plt/_subplots/_export_as_csv.py +89 -47
  324. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +1 -0
  325. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +6 -4
  326. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +88 -38
  327. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +25 -31
  328. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +53 -23
  329. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +38 -25
  330. scitex/plt/_subplots/_export_as_csv_formatters/_format_contourf.py +17 -9
  331. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +70 -124
  332. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +12 -10
  333. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +31 -17
  334. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +33 -21
  335. scitex/plt/_subplots/_export_as_csv_formatters/_format_hexbin.py +14 -4
  336. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +43 -29
  337. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist2d.py +14 -4
  338. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +27 -11
  339. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +7 -5
  340. scitex/plt/_subplots/_export_as_csv_formatters/_format_matshow.py +9 -7
  341. scitex/plt/_subplots/_export_as_csv_formatters/_format_pie.py +15 -6
  342. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +85 -46
  343. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +52 -27
  344. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_imshow.py +1 -0
  345. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +16 -17
  346. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +7 -5
  347. scitex/plt/_subplots/_export_as_csv_formatters/_format_quiver.py +10 -8
  348. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +17 -6
  349. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +43 -26
  350. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +68 -47
  351. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +52 -64
  352. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +55 -50
  353. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +9 -11
  354. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +63 -29
  355. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +4 -4
  356. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +6 -4
  357. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +44 -40
  358. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +46 -39
  359. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +46 -39
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +75 -94
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_stem.py +12 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_step.py +12 -3
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_streamplot.py +10 -8
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_conf_mat.py +17 -15
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_ecdf.py +10 -9
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_fillv.py +35 -31
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_heatmap.py +18 -18
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_image.py +24 -18
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_joyplot.py +9 -7
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_line.py +34 -23
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_ci.py +15 -13
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_std.py +12 -10
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_median_iqr.py +15 -13
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_raster.py +11 -9
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_rectangle.py +84 -56
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_scatter_hist.py +35 -32
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_shaded_line.py +46 -30
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_violin.py +51 -51
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +32 -31
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +34 -31
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +44 -37
  382. scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +91 -74
  383. scitex/plt/_tpl.py +6 -5
  384. scitex/plt/ax/_plot/__init__.py +24 -0
  385. scitex/plt/ax/_plot/_add_fitted_line.py +12 -11
  386. scitex/plt/ax/_plot/_plot_circular_hist.py +3 -1
  387. scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +25 -19
  388. scitex/plt/ax/_plot/_stx_conf_mat.py +6 -3
  389. scitex/plt/ax/_plot/_stx_ecdf.py +5 -3
  390. scitex/plt/ax/_plot/_stx_fillv.py +4 -2
  391. scitex/plt/ax/_plot/_stx_heatmap.py +7 -4
  392. scitex/plt/ax/_plot/_stx_image.py +7 -5
  393. scitex/plt/ax/_plot/_stx_joyplot.py +32 -10
  394. scitex/plt/ax/_plot/_stx_raster.py +26 -11
  395. scitex/plt/ax/_plot/_stx_rectangle.py +2 -2
  396. scitex/plt/ax/_plot/_stx_shaded_line.py +15 -11
  397. scitex/plt/ax/_plot/_stx_violin.py +3 -1
  398. scitex/plt/ax/_style/_add_marginal_ax.py +6 -4
  399. scitex/plt/ax/_style/_auto_scale_axis.py +14 -10
  400. scitex/plt/ax/_style/_extend.py +3 -1
  401. scitex/plt/ax/_style/_force_aspect.py +5 -3
  402. scitex/plt/ax/_style/_format_units.py +2 -2
  403. scitex/plt/ax/_style/_hide_spines.py +5 -1
  404. scitex/plt/ax/_style/_map_ticks.py +5 -3
  405. scitex/plt/ax/_style/_rotate_labels.py +5 -4
  406. scitex/plt/ax/_style/_rotate_labels_v01.py +73 -63
  407. scitex/plt/ax/_style/_set_log_scale.py +120 -85
  408. scitex/plt/ax/_style/_set_meta.py +99 -76
  409. scitex/plt/ax/_style/_set_supxyt.py +33 -16
  410. scitex/plt/ax/_style/_set_xyt.py +27 -18
  411. scitex/plt/ax/_style/_share_axes.py +15 -5
  412. scitex/plt/ax/_style/_show_spines.py +58 -57
  413. scitex/plt/ax/_style/_style_barplot.py +1 -1
  414. scitex/plt/ax/_style/_style_boxplot.py +25 -14
  415. scitex/plt/ax/_style/_style_errorbar.py +0 -0
  416. scitex/plt/ax/_style/_style_scatter.py +1 -1
  417. scitex/plt/ax/_style/_style_suptitles.py +3 -3
  418. scitex/plt/ax/_style/_style_violinplot.py +8 -2
  419. scitex/plt/color/__init__.py +34 -2
  420. scitex/plt/color/_add_hue_col.py +1 -0
  421. scitex/plt/color/_colors.py +0 -1
  422. scitex/plt/color/_get_colors_from_conf_matap.py +3 -1
  423. scitex/plt/color/_vizualize_colors.py +0 -1
  424. scitex/plt/docs/FIGURE_ARCHITECTURE.md +155 -97
  425. scitex/plt/gallery/README.md +75 -0
  426. scitex/plt/gallery/__init__.py +29 -0
  427. scitex/plt/gallery/_generate.py +153 -0
  428. scitex/plt/gallery/_plots.py +594 -0
  429. scitex/plt/gallery/_registry.py +153 -0
  430. scitex/plt/styles/__init__.py +9 -9
  431. scitex/plt/styles/_plot_defaults.py +62 -61
  432. scitex/plt/styles/_plot_postprocess.py +126 -77
  433. scitex/plt/styles/_style_loader.py +0 -0
  434. scitex/plt/styles/presets.py +43 -18
  435. scitex/plt/templates/research-master/scitex/vis/gallery/area/fill_between.json +110 -0
  436. scitex/plt/templates/research-master/scitex/vis/gallery/area/fill_betweenx.json +88 -0
  437. scitex/plt/templates/research-master/scitex/vis/gallery/area/stx_fill_between.json +103 -0
  438. scitex/plt/templates/research-master/scitex/vis/gallery/area/stx_fillv.json +106 -0
  439. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/bar.json +92 -0
  440. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/barh.json +92 -0
  441. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/boxplot.json +92 -0
  442. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_bar.json +84 -0
  443. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_barh.json +84 -0
  444. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_box.json +83 -0
  445. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_boxplot.json +93 -0
  446. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_violin.json +91 -0
  447. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_violinplot.json +91 -0
  448. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/violinplot.json +91 -0
  449. scitex/plt/templates/research-master/scitex/vis/gallery/contour/contour.json +97 -0
  450. scitex/plt/templates/research-master/scitex/vis/gallery/contour/contourf.json +98 -0
  451. scitex/plt/templates/research-master/scitex/vis/gallery/contour/stx_contour.json +84 -0
  452. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/hist.json +101 -0
  453. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/hist2d.json +96 -0
  454. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_ecdf.json +95 -0
  455. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_joyplot.json +95 -0
  456. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_kde.json +93 -0
  457. scitex/plt/templates/research-master/scitex/vis/gallery/grid/imshow.json +95 -0
  458. scitex/plt/templates/research-master/scitex/vis/gallery/grid/matshow.json +95 -0
  459. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_conf_mat.json +83 -0
  460. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_heatmap.json +92 -0
  461. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_image.json +121 -0
  462. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_imshow.json +84 -0
  463. scitex/plt/templates/research-master/scitex/vis/gallery/line/plot.json +110 -0
  464. scitex/plt/templates/research-master/scitex/vis/gallery/line/step.json +92 -0
  465. scitex/plt/templates/research-master/scitex/vis/gallery/line/stx_line.json +95 -0
  466. scitex/plt/templates/research-master/scitex/vis/gallery/line/stx_shaded_line.json +96 -0
  467. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/hexbin.json +95 -0
  468. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/scatter.json +95 -0
  469. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/stem.json +92 -0
  470. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/stx_scatter.json +84 -0
  471. scitex/plt/templates/research-master/scitex/vis/gallery/special/pie.json +94 -0
  472. scitex/plt/templates/research-master/scitex/vis/gallery/special/stx_raster.json +109 -0
  473. scitex/plt/templates/research-master/scitex/vis/gallery/special/stx_rectangle.json +108 -0
  474. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/errorbar.json +93 -0
  475. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_errorbar.json +84 -0
  476. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_mean_ci.json +96 -0
  477. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_mean_std.json +96 -0
  478. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_median_iqr.json +96 -0
  479. scitex/plt/templates/research-master/scitex/vis/gallery/vector/quiver.json +99 -0
  480. scitex/plt/templates/research-master/scitex/vis/gallery/vector/streamplot.json +100 -0
  481. scitex/plt/utils/__init__.py +29 -2
  482. scitex/plt/utils/_close.py +8 -3
  483. scitex/plt/utils/_collect_figure_metadata.py +3031 -265
  484. scitex/plt/utils/_colorbar.py +15 -17
  485. scitex/plt/utils/_configure_mpl.py +22 -14
  486. scitex/plt/utils/_crop.py +60 -27
  487. scitex/plt/utils/_csv_column_naming.py +123 -72
  488. scitex/plt/utils/_dimension_viewer.py +7 -19
  489. scitex/plt/utils/_figure_from_axes_mm.py +70 -16
  490. scitex/plt/utils/_figure_mm.py +3 -2
  491. scitex/plt/utils/_get_actual_font.py +5 -4
  492. scitex/plt/utils/_histogram_utils.py +52 -48
  493. scitex/plt/utils/_is_valid_axis.py +19 -13
  494. scitex/plt/utils/_mk_colorbar.py +3 -3
  495. scitex/plt/utils/_scientific_captions.py +202 -139
  496. scitex/plt/utils/_scitex_config.py +98 -98
  497. scitex/plt/utils/_units.py +0 -0
  498. scitex/plt/utils/metadata/__init__.py +36 -0
  499. scitex/plt/utils/metadata/_artist_extraction.py +119 -0
  500. scitex/plt/utils/metadata/_axes_metadata.py +93 -0
  501. scitex/plt/utils/metadata/_collection_artists.py +292 -0
  502. scitex/plt/utils/metadata/_core.py +208 -0
  503. scitex/plt/utils/metadata/_csv_column_extraction.py +186 -0
  504. scitex/plt/utils/metadata/_csv_hash.py +115 -0
  505. scitex/plt/utils/metadata/_csv_verification.py +95 -0
  506. scitex/plt/utils/metadata/_data_linkage.py +263 -0
  507. scitex/plt/utils/metadata/_dimensions.py +239 -0
  508. scitex/plt/utils/metadata/_figure_metadata.py +58 -0
  509. scitex/plt/utils/metadata/_image_text_artists.py +168 -0
  510. scitex/plt/utils/metadata/_label_parsing.py +82 -0
  511. scitex/plt/utils/metadata/_legend_extraction.py +120 -0
  512. scitex/plt/utils/metadata/_line_artists.py +367 -0
  513. scitex/plt/utils/metadata/_line_semantic_handling.py +173 -0
  514. scitex/plt/utils/metadata/_patch_artists.py +211 -0
  515. scitex/plt/utils/metadata/_plot_content.py +26 -0
  516. scitex/plt/utils/metadata/_plot_type_detection.py +184 -0
  517. scitex/plt/utils/metadata/_precision.py +134 -0
  518. scitex/plt/utils/metadata/_precision_config.py +68 -0
  519. scitex/plt/utils/metadata/_precision_sections.py +211 -0
  520. scitex/plt/utils/metadata/_recipe_extraction.py +267 -0
  521. scitex/plt/utils/metadata/_style_parsing.py +174 -0
  522. scitex/repro/_RandomStateManager.py +33 -38
  523. scitex/repro/__init__.py +16 -7
  524. scitex/repro/_gen_ID.py +7 -9
  525. scitex/repro/_gen_timestamp.py +7 -6
  526. scitex/repro/_hash_array.py +8 -12
  527. scitex/reproduce/__init__.py +1 -1
  528. scitex/resource/_get_processor_usages.py +3 -1
  529. scitex/resource/_log_processor_usages.py +3 -1
  530. scitex/rng/__init__.py +1 -1
  531. scitex/schema/README.md +178 -0
  532. scitex/schema/__init__.py +144 -0
  533. scitex/schema/_canvas.py +444 -0
  534. scitex/schema/_stats.py +762 -0
  535. scitex/schema/_validation.py +590 -0
  536. scitex/scholar/.legacy/Scholar.py +5 -12
  537. scitex/scholar/.legacy/_Scholar.py +66 -99
  538. scitex/scholar/.legacy/_ScholarAPI.py +75 -66
  539. scitex/scholar/.legacy/_tmp/search_engine/_BaseSearchEngine.py +3 -3
  540. scitex/scholar/.legacy/_tmp/search_engine/_UnifiedSearcher.py +4 -9
  541. scitex/scholar/.legacy/_tmp/search_engine/__init__.py +14 -21
  542. scitex/scholar/.legacy/_tmp/search_engine/local/_LocalSearchEngine.py +40 -37
  543. scitex/scholar/.legacy/_tmp/search_engine/local/_VectorSearchEngine.py +31 -28
  544. scitex/scholar/.legacy/_tmp/search_engine/web/_ArxivSearchEngine.py +74 -65
  545. scitex/scholar/.legacy/_tmp/search_engine/web/_CrossRefSearchEngine.py +122 -116
  546. scitex/scholar/.legacy/_tmp/search_engine/web/_GoogleScholarSearchEngine.py +65 -59
  547. scitex/scholar/.legacy/_tmp/search_engine/web/_PubMedSearchEngine.py +121 -107
  548. scitex/scholar/.legacy/_tmp/search_engine/web/_SemanticScholarSearchEngine.py +5 -12
  549. scitex/scholar/.legacy/database/_DatabaseEntry.py +49 -45
  550. scitex/scholar/.legacy/database/_DatabaseIndex.py +131 -94
  551. scitex/scholar/.legacy/database/_LibraryManager.py +65 -63
  552. scitex/scholar/.legacy/database/_PaperDatabase.py +138 -124
  553. scitex/scholar/.legacy/database/_ScholarDatabaseIntegration.py +14 -36
  554. scitex/scholar/.legacy/database/_StorageIntegratedDB.py +192 -156
  555. scitex/scholar/.legacy/database/_ZoteroCompatibleDB.py +300 -237
  556. scitex/scholar/.legacy/database/__init__.py +2 -1
  557. scitex/scholar/.legacy/database/manage.py +92 -84
  558. scitex/scholar/.legacy/lookup/_LookupIndex.py +157 -101
  559. scitex/scholar/.legacy/lookup/__init__.py +2 -1
  560. scitex/scholar/.legacy/metadata/doi/batch/_MetadataHandlerForBatchDOIResolution.py +4 -9
  561. scitex/scholar/.legacy/metadata/doi/batch/_ProgressManagerForBatchDOIResolution.py +10 -23
  562. scitex/scholar/.legacy/metadata/doi/batch/_SourceStatsManagerForBatchDOIResolution.py +4 -9
  563. scitex/scholar/.legacy/metadata/doi/batch/__init__.py +3 -1
  564. scitex/scholar/.legacy/metadata/doi/resolvers/_BatchDOIResolver.py +10 -25
  565. scitex/scholar/.legacy/metadata/doi/resolvers/_BibTeXDOIResolver.py +19 -49
  566. scitex/scholar/.legacy/metadata/doi/resolvers/_DOIResolver.py +1 -0
  567. scitex/scholar/.legacy/metadata/doi/resolvers/_SingleDOIResolver.py +8 -20
  568. scitex/scholar/.legacy/metadata/doi/sources/.combined-SemanticScholarSource/_SemanticScholarSource.py +37 -35
  569. scitex/scholar/.legacy/metadata/doi/sources/.combined-SemanticScholarSource/_SemanticScholarSourceEnhanced.py +49 -37
  570. scitex/scholar/.legacy/metadata/doi/sources/_ArXivSource.py +11 -30
  571. scitex/scholar/.legacy/metadata/doi/sources/_BaseDOISource.py +19 -47
  572. scitex/scholar/.legacy/metadata/doi/sources/_CrossRefLocalSource.py +1 -0
  573. scitex/scholar/.legacy/metadata/doi/sources/_CrossRefSource.py +12 -33
  574. scitex/scholar/.legacy/metadata/doi/sources/_OpenAlexSource.py +8 -20
  575. scitex/scholar/.legacy/metadata/doi/sources/_PubMedSource.py +10 -27
  576. scitex/scholar/.legacy/metadata/doi/sources/_SemanticScholarSource.py +11 -29
  577. scitex/scholar/.legacy/metadata/doi/sources/_SourceManager.py +8 -21
  578. scitex/scholar/.legacy/metadata/doi/sources/_SourceResolutionStrategy.py +24 -55
  579. scitex/scholar/.legacy/metadata/doi/sources/_SourceRotationManager.py +8 -21
  580. scitex/scholar/.legacy/metadata/doi/sources/_URLDOISource.py +9 -16
  581. scitex/scholar/.legacy/metadata/doi/sources/_UnifiedSource.py +8 -22
  582. scitex/scholar/.legacy/metadata/doi/sources/__init__.py +1 -0
  583. scitex/scholar/.legacy/metadata/doi/utils/_PubMedConverter.py +4 -8
  584. scitex/scholar/.legacy/metadata/doi/utils/_RateLimitHandler.py +17 -43
  585. scitex/scholar/.legacy/metadata/doi/utils/_TextNormalizer.py +8 -18
  586. scitex/scholar/.legacy/metadata/doi/utils/_URLDOIExtractor.py +4 -8
  587. scitex/scholar/.legacy/metadata/doi/utils/__init__.py +1 -0
  588. scitex/scholar/.legacy/metadata/doi/utils/_to_complete_metadata_structure.py +1 -0
  589. scitex/scholar/.legacy/metadata/enrichment/_LibraryEnricher.py +2 -3
  590. scitex/scholar/.legacy/metadata/enrichment/enrichers/_ImpactFactorEnricher.py +6 -12
  591. scitex/scholar/.legacy/metadata/enrichment/enrichers/_SmartEnricher.py +5 -10
  592. scitex/scholar/.legacy/metadata/enrichment/sources/_UnifiedMetadataSource.py +4 -5
  593. scitex/scholar/.legacy/metadata/query_to_full_meta_json.py +8 -12
  594. scitex/scholar/.legacy/metadata/urls/_URLMetadataHandler.py +3 -3
  595. scitex/scholar/.legacy/metadata/urls/_ZoteroTranslatorRunner.py +15 -21
  596. scitex/scholar/.legacy/metadata/urls/__init__.py +3 -3
  597. scitex/scholar/.legacy/metadata/urls/_finder.py +4 -6
  598. scitex/scholar/.legacy/metadata/urls/_handler.py +7 -15
  599. scitex/scholar/.legacy/metadata/urls/_resolver.py +6 -12
  600. scitex/scholar/.legacy/search/_Embedder.py +74 -69
  601. scitex/scholar/.legacy/search/_SemanticSearch.py +91 -90
  602. scitex/scholar/.legacy/search/_SemanticSearchEngine.py +104 -109
  603. scitex/scholar/.legacy/search/_UnifiedSearcher.py +530 -471
  604. scitex/scholar/.legacy/search/_VectorDatabase.py +111 -92
  605. scitex/scholar/.legacy/search/__init__.py +1 -0
  606. scitex/scholar/.legacy/storage/_EnhancedStorageManager.py +182 -154
  607. scitex/scholar/.legacy/storage/__init__.py +2 -1
  608. scitex/scholar/__init__.py +0 -2
  609. scitex/scholar/__main__.py +1 -3
  610. scitex/scholar/auth/ScholarAuthManager.py +13 -36
  611. scitex/scholar/auth/core/AuthenticationGateway.py +15 -29
  612. scitex/scholar/auth/core/BrowserAuthenticator.py +22 -57
  613. scitex/scholar/auth/core/StrategyResolver.py +10 -27
  614. scitex/scholar/auth/core/__init__.py +5 -1
  615. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +11 -21
  616. scitex/scholar/auth/gateway/_OpenURLResolver.py +10 -18
  617. scitex/scholar/auth/gateway/_resolve_functions.py +3 -3
  618. scitex/scholar/auth/providers/BaseAuthenticator.py +1 -0
  619. scitex/scholar/auth/providers/EZProxyAuthenticator.py +7 -14
  620. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +29 -57
  621. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +87 -73
  622. scitex/scholar/auth/session/AuthCacheManager.py +12 -22
  623. scitex/scholar/auth/session/SessionManager.py +4 -6
  624. scitex/scholar/auth/sso/BaseSSOAutomator.py +13 -19
  625. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +16 -45
  626. scitex/scholar/auth/sso/SSOAutomator.py +8 -15
  627. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +13 -23
  628. scitex/scholar/browser/ScholarBrowserManager.py +31 -56
  629. scitex/scholar/browser/__init__.py +1 -0
  630. scitex/scholar/browser/utils/click_and_wait.py +3 -4
  631. scitex/scholar/browser/utils/close_unwanted_pages.py +4 -7
  632. scitex/scholar/browser/utils/wait_redirects.py +15 -40
  633. scitex/scholar/citation_graph/__init__.py +0 -0
  634. scitex/scholar/citation_graph/builder.py +3 -7
  635. scitex/scholar/citation_graph/database.py +4 -11
  636. scitex/scholar/citation_graph/example.py +5 -10
  637. scitex/scholar/citation_graph/models.py +0 -0
  638. scitex/scholar/cli/_url_utils.py +1 -1
  639. scitex/scholar/cli/chrome.py +5 -3
  640. scitex/scholar/cli/download_pdf.py +13 -14
  641. scitex/scholar/cli/handlers/bibtex_handler.py +4 -12
  642. scitex/scholar/cli/handlers/doi_handler.py +1 -3
  643. scitex/scholar/cli/handlers/project_handler.py +6 -20
  644. scitex/scholar/cli/open_browser.py +41 -39
  645. scitex/scholar/cli/open_browser_auto.py +31 -39
  646. scitex/scholar/cli/open_browser_monitored.py +27 -24
  647. scitex/scholar/config/ScholarConfig.py +5 -8
  648. scitex/scholar/config/__init__.py +1 -0
  649. scitex/scholar/config/core/_CascadeConfig.py +3 -3
  650. scitex/scholar/config/core/_PathManager.py +16 -28
  651. scitex/scholar/core/Paper.py +79 -78
  652. scitex/scholar/core/Papers.py +16 -27
  653. scitex/scholar/core/Scholar.py +98 -229
  654. scitex/scholar/core/journal_normalizer.py +52 -49
  655. scitex/scholar/core/oa_cache.py +27 -23
  656. scitex/scholar/core/open_access.py +17 -8
  657. scitex/scholar/docs/template.py +4 -3
  658. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/clf_svm.py +0 -0
  659. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/download.py +0 -0
  660. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_conf_mat.py +0 -0
  661. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_digits.py +0 -0
  662. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_umap_space.py +0 -0
  663. scitex/scholar/examples/00_config.py +10 -9
  664. scitex/scholar/examples/01_auth.py +3 -0
  665. scitex/scholar/examples/02_browser.py +14 -10
  666. scitex/scholar/examples/03_01-engine.py +3 -0
  667. scitex/scholar/examples/03_02-engine-for-bibtex.py +4 -3
  668. scitex/scholar/examples/04_01-url.py +9 -9
  669. scitex/scholar/examples/04_02-url-for-bibtex.py +7 -3
  670. scitex/scholar/examples/04_02-url-for-dois.py +87 -97
  671. scitex/scholar/examples/05_download_pdf.py +10 -4
  672. scitex/scholar/examples/06_find_and_download.py +6 -6
  673. scitex/scholar/examples/06_parse_bibtex.py +17 -17
  674. scitex/scholar/examples/07_storage_integration.py +6 -9
  675. scitex/scholar/examples/99_fullpipeline-for-bibtex.py +14 -15
  676. scitex/scholar/examples/99_fullpipeline-for-one-entry.py +31 -23
  677. scitex/scholar/examples/99_maintenance.py +3 -0
  678. scitex/scholar/examples/dev.py +2 -3
  679. scitex/scholar/examples/zotero_integration.py +11 -18
  680. scitex/scholar/impact_factor/ImpactFactorEngine.py +7 -9
  681. scitex/scholar/impact_factor/estimation/__init__.py +4 -4
  682. scitex/scholar/impact_factor/estimation/core/__init__.py +3 -7
  683. scitex/scholar/impact_factor/estimation/core/cache_manager.py +223 -211
  684. scitex/scholar/impact_factor/estimation/core/calculator.py +165 -131
  685. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +217 -172
  686. scitex/scholar/impact_factor/jcr/ImpactFactorJCREngine.py +6 -14
  687. scitex/scholar/impact_factor/jcr/build_database.py +4 -3
  688. scitex/scholar/integration/base.py +9 -17
  689. scitex/scholar/integration/mendeley/exporter.py +2 -4
  690. scitex/scholar/integration/mendeley/importer.py +3 -3
  691. scitex/scholar/integration/mendeley/linker.py +3 -3
  692. scitex/scholar/integration/mendeley/mapper.py +9 -6
  693. scitex/scholar/integration/zotero/__main__.py +26 -43
  694. scitex/scholar/integration/zotero/exporter.py +15 -11
  695. scitex/scholar/integration/zotero/importer.py +12 -10
  696. scitex/scholar/integration/zotero/linker.py +8 -12
  697. scitex/scholar/integration/zotero/mapper.py +17 -12
  698. scitex/scholar/metadata_engines/.combined-SemanticScholarSource/_SemanticScholarSource.py +37 -35
  699. scitex/scholar/metadata_engines/.combined-SemanticScholarSource/_SemanticScholarSourceEnhanced.py +47 -35
  700. scitex/scholar/metadata_engines/ScholarEngine.py +21 -43
  701. scitex/scholar/metadata_engines/__init__.py +1 -0
  702. scitex/scholar/metadata_engines/individual/ArXivEngine.py +15 -37
  703. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +15 -42
  704. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +24 -45
  705. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +11 -21
  706. scitex/scholar/metadata_engines/individual/PubMedEngine.py +10 -27
  707. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +28 -35
  708. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +11 -22
  709. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +20 -49
  710. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +4 -8
  711. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +5 -10
  712. scitex/scholar/metadata_engines/utils/__init__.py +2 -0
  713. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +3 -0
  714. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +2 -3
  715. scitex/scholar/pdf_download/ScholarPDFDownloader.py +25 -37
  716. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +11 -19
  717. scitex/scholar/pdf_download/strategies/direct_download.py +5 -9
  718. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +3 -3
  719. scitex/scholar/pdf_download/strategies/manual_download_utils.py +6 -13
  720. scitex/scholar/pdf_download/strategies/open_access_download.py +49 -31
  721. scitex/scholar/pdf_download/strategies/response_body.py +8 -19
  722. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +9 -18
  723. scitex/scholar/pipelines/ScholarPipelineMetadataParallel.py +25 -26
  724. scitex/scholar/pipelines/ScholarPipelineMetadataSingle.py +62 -23
  725. scitex/scholar/pipelines/ScholarPipelineParallel.py +13 -30
  726. scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +299 -220
  727. scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +202 -165
  728. scitex/scholar/pipelines/ScholarPipelineSingle.py +25 -51
  729. scitex/scholar/pipelines/SearchQueryParser.py +55 -55
  730. scitex/scholar/search_engines/ScholarSearchEngine.py +31 -27
  731. scitex/scholar/search_engines/_BaseSearchEngine.py +20 -23
  732. scitex/scholar/search_engines/individual/ArXivSearchEngine.py +53 -35
  733. scitex/scholar/search_engines/individual/CrossRefSearchEngine.py +47 -40
  734. scitex/scholar/search_engines/individual/OpenAlexSearchEngine.py +55 -50
  735. scitex/scholar/search_engines/individual/PubMedSearchEngine.py +8 -10
  736. scitex/scholar/search_engines/individual/SemanticScholarSearchEngine.py +55 -49
  737. scitex/scholar/storage/BibTeXHandler.py +150 -95
  738. scitex/scholar/storage/PaperIO.py +3 -6
  739. scitex/scholar/storage/ScholarLibrary.py +70 -49
  740. scitex/scholar/storage/_DeduplicationManager.py +52 -25
  741. scitex/scholar/storage/_LibraryCacheManager.py +19 -46
  742. scitex/scholar/storage/_LibraryManager.py +65 -175
  743. scitex/scholar/url_finder/ScholarURLFinder.py +9 -25
  744. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +1 -1
  745. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +6 -10
  746. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +4 -6
  747. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +8 -15
  748. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +3 -3
  749. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +3 -3
  750. scitex/scholar/url_finder/translators/core/patterns.py +6 -4
  751. scitex/scholar/url_finder/translators/core/registry.py +6 -9
  752. scitex/scholar/url_finder/translators/individual/BOFiP_Impots.py +60 -52
  753. scitex/scholar/url_finder/translators/individual/Baidu_Scholar.py +54 -62
  754. scitex/scholar/url_finder/translators/individual/Bangkok_Post.py +38 -44
  755. scitex/scholar/url_finder/translators/individual/Baruch_Foundation.py +43 -47
  756. scitex/scholar/url_finder/translators/individual/Beobachter.py +46 -50
  757. scitex/scholar/url_finder/translators/individual/Bezneng_Gajit.py +37 -41
  758. scitex/scholar/url_finder/translators/individual/BibLaTeX.py +59 -52
  759. scitex/scholar/url_finder/translators/individual/BibTeX.py +83 -79
  760. scitex/scholar/url_finder/translators/individual/Biblio_com.py +48 -51
  761. scitex/scholar/url_finder/translators/individual/Bibliontology_RDF.py +58 -56
  762. scitex/scholar/url_finder/translators/individual/Camara_Brasileira_do_Livro_ISBN.py +102 -99
  763. scitex/scholar/url_finder/translators/individual/CanLII.py +49 -43
  764. scitex/scholar/url_finder/translators/individual/Canada_com.py +36 -40
  765. scitex/scholar/url_finder/translators/individual/Canadian_Letters_and_Images.py +43 -43
  766. scitex/scholar/url_finder/translators/individual/Canadiana_ca.py +77 -66
  767. scitex/scholar/url_finder/translators/individual/Cascadilla_Proceedings_Project.py +68 -62
  768. scitex/scholar/url_finder/translators/individual/Central_and_Eastern_European_Online_Library_Journals.py +60 -60
  769. scitex/scholar/url_finder/translators/individual/Champlain_Society_Collection.py +63 -61
  770. scitex/scholar/url_finder/translators/individual/Chicago_Journal_of_Theoretical_Computer_Science.py +74 -58
  771. scitex/scholar/url_finder/translators/individual/Christian_Science_Monitor.py +32 -38
  772. scitex/scholar/url_finder/translators/individual/Columbia_University_Press.py +51 -47
  773. scitex/scholar/url_finder/translators/individual/Common_Place.py +66 -57
  774. scitex/scholar/url_finder/translators/individual/Cornell_LII.py +66 -62
  775. scitex/scholar/url_finder/translators/individual/Cornell_University_Press.py +38 -45
  776. scitex/scholar/url_finder/translators/individual/CourtListener.py +52 -56
  777. scitex/scholar/url_finder/translators/individual/DAI_Zenon.py +53 -54
  778. scitex/scholar/url_finder/translators/individual/access_medicine.py +27 -33
  779. scitex/scholar/url_finder/translators/individual/acm.py +1 -1
  780. scitex/scholar/url_finder/translators/individual/acm_digital_library.py +93 -63
  781. scitex/scholar/url_finder/translators/individual/airiti.py +3 -1
  782. scitex/scholar/url_finder/translators/individual/aosic.py +3 -1
  783. scitex/scholar/url_finder/translators/individual/archive_ouverte_aosic.py +3 -1
  784. scitex/scholar/url_finder/translators/individual/archive_ouverte_en_sciences_de_l_information_et_de_la_communication___aosic_.py +6 -2
  785. scitex/scholar/url_finder/translators/individual/artforum.py +35 -27
  786. scitex/scholar/url_finder/translators/individual/arxiv.py +1 -1
  787. scitex/scholar/url_finder/translators/individual/arxiv_org.py +8 -4
  788. scitex/scholar/url_finder/translators/individual/atlanta_journal_constitution.py +22 -18
  789. scitex/scholar/url_finder/translators/individual/atypon_journals.py +19 -11
  790. scitex/scholar/url_finder/translators/individual/austlii_and_nzlii.py +48 -44
  791. scitex/scholar/url_finder/translators/individual/australian_dictionary_of_biography.py +21 -17
  792. scitex/scholar/url_finder/translators/individual/bailii.py +22 -19
  793. scitex/scholar/url_finder/translators/individual/bbc.py +46 -42
  794. scitex/scholar/url_finder/translators/individual/bbc_genome.py +37 -25
  795. scitex/scholar/url_finder/translators/individual/biblioteca_nacional_de_maestros.py +24 -20
  796. scitex/scholar/url_finder/translators/individual/bibliotheque_archives_nationale_quebec_pistard.py +42 -43
  797. scitex/scholar/url_finder/translators/individual/bibliotheque_archives_nationales_quebec.py +87 -81
  798. scitex/scholar/url_finder/translators/individual/bibliotheque_nationale_france.py +39 -37
  799. scitex/scholar/url_finder/translators/individual/bibsys.py +32 -28
  800. scitex/scholar/url_finder/translators/individual/bioconductor.py +58 -52
  801. scitex/scholar/url_finder/translators/individual/biomed_central.py +23 -15
  802. scitex/scholar/url_finder/translators/individual/biorxiv.py +26 -13
  803. scitex/scholar/url_finder/translators/individual/blogger.py +39 -43
  804. scitex/scholar/url_finder/translators/individual/bloomberg.py +48 -52
  805. scitex/scholar/url_finder/translators/individual/bloomsbury_food_library.py +37 -37
  806. scitex/scholar/url_finder/translators/individual/bluesky.py +30 -28
  807. scitex/scholar/url_finder/translators/individual/bnf_isbn.py +1 -1
  808. scitex/scholar/url_finder/translators/individual/bocc.py +66 -60
  809. scitex/scholar/url_finder/translators/individual/boe.py +52 -52
  810. scitex/scholar/url_finder/translators/individual/brill.py +3 -1
  811. scitex/scholar/url_finder/translators/individual/business_standard.py +36 -38
  812. scitex/scholar/url_finder/translators/individual/cabi_cab_abstracts.py +39 -41
  813. scitex/scholar/url_finder/translators/individual/cambridge.py +3 -1
  814. scitex/scholar/url_finder/translators/individual/cambridge_core.py +30 -24
  815. scitex/scholar/url_finder/translators/individual/caod.py +50 -46
  816. scitex/scholar/url_finder/translators/individual/cbc.py +91 -67
  817. scitex/scholar/url_finder/translators/individual/ccfr_bnf.py +49 -53
  818. scitex/scholar/url_finder/translators/individual/cia_world_factbook.py +43 -33
  819. scitex/scholar/url_finder/translators/individual/crossref_rest.py +208 -174
  820. scitex/scholar/url_finder/translators/individual/current_affairs.py +29 -35
  821. scitex/scholar/url_finder/translators/individual/dabi.py +70 -66
  822. scitex/scholar/url_finder/translators/individual/dagens_nyheter.py +3 -1
  823. scitex/scholar/url_finder/translators/individual/dagstuhl.py +10 -15
  824. scitex/scholar/url_finder/translators/individual/dar_almandumah.py +13 -9
  825. scitex/scholar/url_finder/translators/individual/dart_europe.py +19 -22
  826. scitex/scholar/url_finder/translators/individual/data_gov.py +2 -2
  827. scitex/scholar/url_finder/translators/individual/databrary.py +27 -28
  828. scitex/scholar/url_finder/translators/individual/datacite_json.py +152 -137
  829. scitex/scholar/url_finder/translators/individual/dataverse.py +68 -64
  830. scitex/scholar/url_finder/translators/individual/daum_news.py +38 -38
  831. scitex/scholar/url_finder/translators/individual/dblp.py +4 -8
  832. scitex/scholar/url_finder/translators/individual/dblp_computer_science_bibliography.py +8 -3
  833. scitex/scholar/url_finder/translators/individual/dbpia.py +5 -3
  834. scitex/scholar/url_finder/translators/individual/defense_technical_information_center.py +30 -28
  835. scitex/scholar/url_finder/translators/individual/delpher.py +102 -79
  836. scitex/scholar/url_finder/translators/individual/demographic_research.py +35 -31
  837. scitex/scholar/url_finder/translators/individual/denik_cz.py +58 -54
  838. scitex/scholar/url_finder/translators/individual/depatisnet.py +7 -10
  839. scitex/scholar/url_finder/translators/individual/der_freitag.py +81 -66
  840. scitex/scholar/url_finder/translators/individual/der_spiegel.py +56 -54
  841. scitex/scholar/url_finder/translators/individual/digibib_net.py +3 -1
  842. scitex/scholar/url_finder/translators/individual/digizeitschriften.py +3 -1
  843. scitex/scholar/url_finder/translators/individual/dpla.py +13 -14
  844. scitex/scholar/url_finder/translators/individual/dspace.py +2 -2
  845. scitex/scholar/url_finder/translators/individual/ebrary.py +3 -1
  846. scitex/scholar/url_finder/translators/individual/ebscohost.py +3 -1
  847. scitex/scholar/url_finder/translators/individual/electronic_colloquium_on_computational_complexity.py +3 -1
  848. scitex/scholar/url_finder/translators/individual/elife.py +3 -1
  849. scitex/scholar/url_finder/translators/individual/elsevier_health_journals.py +3 -1
  850. scitex/scholar/url_finder/translators/individual/emerald.py +3 -1
  851. scitex/scholar/url_finder/translators/individual/emerald_insight.py +3 -1
  852. scitex/scholar/url_finder/translators/individual/epicurious.py +3 -1
  853. scitex/scholar/url_finder/translators/individual/eurogamerusgamer.py +3 -1
  854. scitex/scholar/url_finder/translators/individual/fachportal_padagogik.py +3 -1
  855. scitex/scholar/url_finder/translators/individual/frontiers.py +1 -1
  856. scitex/scholar/url_finder/translators/individual/gale_databases.py +3 -1
  857. scitex/scholar/url_finder/translators/individual/gms_german_medical_science.py +6 -2
  858. scitex/scholar/url_finder/translators/individual/ieee_computer_society.py +6 -2
  859. scitex/scholar/url_finder/translators/individual/ieee_xplore.py +41 -35
  860. scitex/scholar/url_finder/translators/individual/inter_research_science_center.py +6 -2
  861. scitex/scholar/url_finder/translators/individual/jisc_historical_texts.py +3 -1
  862. scitex/scholar/url_finder/translators/individual/jstor.py +14 -12
  863. scitex/scholar/url_finder/translators/individual/korean_national_library.py +3 -1
  864. scitex/scholar/url_finder/translators/individual/la_times.py +3 -1
  865. scitex/scholar/url_finder/translators/individual/landesbibliographie_baden_wurttemberg.py +3 -1
  866. scitex/scholar/url_finder/translators/individual/legislative_insight.py +3 -1
  867. scitex/scholar/url_finder/translators/individual/libraries_tasmania.py +3 -1
  868. scitex/scholar/url_finder/translators/individual/library_catalog__koha_.py +3 -1
  869. scitex/scholar/url_finder/translators/individual/lingbuzz.py +2 -2
  870. scitex/scholar/url_finder/translators/individual/max_planck_institute_for_the_history_of_science_virtual_laboratory_library.py +3 -1
  871. scitex/scholar/url_finder/translators/individual/mdpi.py +12 -6
  872. scitex/scholar/url_finder/translators/individual/microbiology_society_journals.py +3 -1
  873. scitex/scholar/url_finder/translators/individual/midas_journals.py +3 -1
  874. scitex/scholar/url_finder/translators/individual/nagoya_university_opac.py +3 -1
  875. scitex/scholar/url_finder/translators/individual/nature_publishing_group.py +32 -19
  876. scitex/scholar/url_finder/translators/individual/ntsb_accident_reports.py +3 -1
  877. scitex/scholar/url_finder/translators/individual/openedition_journals.py +8 -4
  878. scitex/scholar/url_finder/translators/individual/orcid.py +16 -15
  879. scitex/scholar/url_finder/translators/individual/oxford.py +25 -19
  880. scitex/scholar/url_finder/translators/individual/oxford_dictionaries_premium.py +3 -1
  881. scitex/scholar/url_finder/translators/individual/ozon_ru.py +3 -1
  882. scitex/scholar/url_finder/translators/individual/plos.py +9 -12
  883. scitex/scholar/url_finder/translators/individual/polygon.py +3 -1
  884. scitex/scholar/url_finder/translators/individual/primo.py +3 -1
  885. scitex/scholar/url_finder/translators/individual/project_muse.py +3 -1
  886. scitex/scholar/url_finder/translators/individual/pubfactory_journals.py +3 -1
  887. scitex/scholar/url_finder/translators/individual/pubmed.py +71 -65
  888. scitex/scholar/url_finder/translators/individual/pubmed_central.py +8 -6
  889. scitex/scholar/url_finder/translators/individual/rechtspraak_nl.py +3 -1
  890. scitex/scholar/url_finder/translators/individual/sage_journals.py +25 -17
  891. scitex/scholar/url_finder/translators/individual/sciencedirect.py +36 -17
  892. scitex/scholar/url_finder/translators/individual/semantics_visual_library.py +3 -1
  893. scitex/scholar/url_finder/translators/individual/silverchair.py +70 -52
  894. scitex/scholar/url_finder/translators/individual/sora.py +3 -1
  895. scitex/scholar/url_finder/translators/individual/springer.py +15 -11
  896. scitex/scholar/url_finder/translators/individual/ssrn.py +3 -3
  897. scitex/scholar/url_finder/translators/individual/stanford_encyclopedia_of_philosophy.py +3 -1
  898. scitex/scholar/url_finder/translators/individual/superlib.py +3 -1
  899. scitex/scholar/url_finder/translators/individual/treesearch.py +3 -1
  900. scitex/scholar/url_finder/translators/individual/university_of_chicago_press_books.py +3 -1
  901. scitex/scholar/url_finder/translators/individual/vlex.py +3 -1
  902. scitex/scholar/url_finder/translators/individual/web_of_science.py +3 -1
  903. scitex/scholar/url_finder/translators/individual/web_of_science_nextgen.py +3 -1
  904. scitex/scholar/url_finder/translators/individual/wiley.py +31 -25
  905. scitex/scholar/url_finder/translators/individual/wilson_center_digital_archive.py +3 -1
  906. scitex/scholar/utils/bibtex/_parse_bibtex.py +3 -3
  907. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +5 -9
  908. scitex/scholar/utils/text/_TextNormalizer.py +249 -176
  909. scitex/scholar/utils/validation/DOIValidator.py +31 -28
  910. scitex/scholar/utils/validation/__init__.py +0 -0
  911. scitex/scholar/utils/validation/validate_library_dois.py +61 -57
  912. scitex/scholar/zotero/__init__.py +1 -1
  913. scitex/security/cli.py +7 -20
  914. scitex/security/github.py +45 -32
  915. scitex/session/__init__.py +8 -9
  916. scitex/session/_decorator.py +49 -42
  917. scitex/session/_lifecycle.py +39 -39
  918. scitex/session/_manager.py +24 -20
  919. scitex/sh/__init__.py +4 -3
  920. scitex/sh/_execute.py +10 -7
  921. scitex/sh/_security.py +3 -3
  922. scitex/sh/_types.py +2 -3
  923. scitex/stats/__init__.py +57 -6
  924. scitex/stats/_schema.py +42 -569
  925. scitex/stats/auto/__init__.py +188 -0
  926. scitex/stats/auto/_context.py +331 -0
  927. scitex/stats/auto/_formatting.py +679 -0
  928. scitex/stats/auto/_rules.py +901 -0
  929. scitex/stats/auto/_selector.py +554 -0
  930. scitex/stats/auto/_styles.py +721 -0
  931. scitex/stats/correct/__init__.py +4 -4
  932. scitex/stats/correct/_correct_bonferroni.py +43 -34
  933. scitex/stats/correct/_correct_fdr.py +14 -40
  934. scitex/stats/correct/_correct_fdr_.py +39 -46
  935. scitex/stats/correct/_correct_holm.py +14 -32
  936. scitex/stats/correct/_correct_sidak.py +36 -21
  937. scitex/stats/descriptive/_circular.py +20 -21
  938. scitex/stats/descriptive/_describe.py +19 -5
  939. scitex/stats/descriptive/_nan.py +5 -7
  940. scitex/stats/descriptive/_real.py +4 -3
  941. scitex/stats/effect_sizes/__init__.py +10 -11
  942. scitex/stats/effect_sizes/_cliffs_delta.py +35 -32
  943. scitex/stats/effect_sizes/_cohens_d.py +30 -31
  944. scitex/stats/effect_sizes/_epsilon_squared.py +19 -22
  945. scitex/stats/effect_sizes/_eta_squared.py +23 -27
  946. scitex/stats/effect_sizes/_prob_superiority.py +18 -21
  947. scitex/stats/posthoc/__init__.py +3 -3
  948. scitex/stats/posthoc/_dunnett.py +75 -55
  949. scitex/stats/posthoc/_games_howell.py +61 -43
  950. scitex/stats/posthoc/_tukey_hsd.py +42 -34
  951. scitex/stats/power/__init__.py +2 -2
  952. scitex/stats/power/_power.py +56 -56
  953. scitex/stats/tests/__init__.py +1 -1
  954. scitex/stats/tests/correlation/__init__.py +1 -1
  955. scitex/stats/tests/correlation/_test_pearson.py +28 -38
  956. scitex/stats/utils/__init__.py +14 -17
  957. scitex/stats/utils/_effect_size.py +85 -78
  958. scitex/stats/utils/_formatters.py +49 -43
  959. scitex/stats/utils/_normalizers.py +7 -14
  960. scitex/stats/utils/_power.py +56 -56
  961. scitex/str/__init__.py +1 -0
  962. scitex/str/_clean_path.py +3 -3
  963. scitex/str/_factor_out_digits.py +86 -58
  964. scitex/str/_format_plot_text.py +180 -111
  965. scitex/str/_latex.py +19 -19
  966. scitex/str/_latex_fallback.py +9 -10
  967. scitex/str/_parse.py +3 -6
  968. scitex/str/_print_debug.py +13 -13
  969. scitex/str/_printc.py +2 -0
  970. scitex/str/_search.py +3 -3
  971. scitex/template/.legacy/_clone_project.py +9 -13
  972. scitex/template/__init__.py +10 -2
  973. scitex/template/_clone_project.py +7 -2
  974. scitex/template/_copy.py +1 -0
  975. scitex/template/_customize.py +3 -6
  976. scitex/template/_git_strategy.py +2 -3
  977. scitex/template/_rename.py +1 -0
  978. scitex/template/clone_pip_project.py +6 -7
  979. scitex/template/clone_research.py +7 -10
  980. scitex/template/clone_singularity.py +6 -7
  981. scitex/template/clone_writer_directory.py +6 -7
  982. scitex/tex/_preview.py +26 -11
  983. scitex/tex/_to_vec.py +10 -7
  984. scitex/torch/__init__.py +11 -1
  985. scitex/types/_ArrayLike.py +2 -0
  986. scitex/types/_is_listed_X.py +3 -3
  987. scitex/units.py +110 -77
  988. scitex/utils/_compress_hdf5.py +3 -3
  989. scitex/utils/_email.py +8 -4
  990. scitex/utils/_notify.py +14 -8
  991. scitex/utils/_search.py +6 -6
  992. scitex/utils/_verify_scitex_format.py +17 -42
  993. scitex/utils/_verify_scitex_format_v01.py +12 -34
  994. scitex/utils/template.py +4 -3
  995. scitex/vis/__init__.py +0 -0
  996. scitex/vis/backend/__init__.py +3 -3
  997. scitex/vis/backend/{export.py → _export.py} +1 -1
  998. scitex/vis/backend/{parser.py → _parser.py} +1 -3
  999. scitex/vis/backend/{render.py → _render.py} +1 -1
  1000. scitex/vis/canvas.py +15 -3
  1001. scitex/vis/editor/__init__.py +0 -0
  1002. scitex/vis/editor/_dearpygui_editor.py +450 -304
  1003. scitex/vis/editor/_defaults.py +114 -123
  1004. scitex/vis/editor/_edit.py +38 -26
  1005. scitex/vis/editor/_flask_editor.py +8 -8
  1006. scitex/vis/editor/_mpl_editor.py +63 -48
  1007. scitex/vis/editor/_qt_editor.py +210 -159
  1008. scitex/vis/editor/_tkinter_editor.py +146 -89
  1009. scitex/vis/editor/flask_editor/__init__.py +10 -10
  1010. scitex/vis/editor/flask_editor/_bbox.py +529 -0
  1011. scitex/vis/editor/flask_editor/{core.py → _core.py} +45 -29
  1012. scitex/vis/editor/flask_editor/_plotter.py +567 -0
  1013. scitex/vis/editor/flask_editor/_renderer.py +393 -0
  1014. scitex/vis/editor/flask_editor/{utils.py → _utils.py} +13 -14
  1015. scitex/vis/editor/flask_editor/templates/__init__.py +5 -5
  1016. scitex/vis/editor/flask_editor/templates/{html.py → _html.py} +234 -16
  1017. scitex/vis/editor/flask_editor/templates/_scripts.py +1261 -0
  1018. scitex/vis/editor/flask_editor/templates/{styles.py → _styles.py} +192 -2
  1019. scitex/vis/io/__init__.py +5 -5
  1020. scitex/vis/io/{canvas.py → _canvas.py} +8 -4
  1021. scitex/vis/io/{data.py → _data.py} +13 -9
  1022. scitex/vis/io/{directory.py → _directory.py} +7 -4
  1023. scitex/vis/io/{export.py → _export.py} +15 -12
  1024. scitex/vis/io/{load.py → _load.py} +1 -1
  1025. scitex/vis/io/{panel.py → _panel.py} +21 -13
  1026. scitex/vis/io/{save.py → _save.py} +0 -0
  1027. scitex/vis/model/__init__.py +7 -7
  1028. scitex/vis/model/{annotations.py → _annotations.py} +2 -4
  1029. scitex/vis/model/{axes.py → _axes.py} +1 -1
  1030. scitex/vis/model/{figure.py → _figure.py} +0 -0
  1031. scitex/vis/model/{guides.py → _guides.py} +1 -1
  1032. scitex/vis/model/{plot.py → _plot.py} +2 -4
  1033. scitex/vis/model/{plot_types.py → _plot_types.py} +0 -0
  1034. scitex/vis/model/{styles.py → _styles.py} +0 -0
  1035. scitex/vis/utils/__init__.py +2 -2
  1036. scitex/vis/utils/{defaults.py → _defaults.py} +1 -2
  1037. scitex/vis/utils/{validate.py → _validate.py} +3 -9
  1038. scitex/web/__init__.py +7 -1
  1039. scitex/web/_scraping.py +54 -38
  1040. scitex/web/_search_pubmed.py +30 -14
  1041. scitex/writer/.legacy/Writer_v01-refactored.py +4 -4
  1042. scitex/writer/.legacy/_compile.py +18 -28
  1043. scitex/writer/Writer.py +8 -21
  1044. scitex/writer/__init__.py +11 -11
  1045. scitex/writer/_clone_writer_project.py +2 -6
  1046. scitex/writer/_compile/__init__.py +1 -0
  1047. scitex/writer/_compile/_parser.py +1 -0
  1048. scitex/writer/_compile/_runner.py +35 -38
  1049. scitex/writer/_compile/_validator.py +1 -0
  1050. scitex/writer/_compile/manuscript.py +1 -0
  1051. scitex/writer/_compile/revision.py +1 -0
  1052. scitex/writer/_compile/supplementary.py +1 -0
  1053. scitex/writer/_compile_async.py +5 -12
  1054. scitex/writer/_project/__init__.py +1 -0
  1055. scitex/writer/_project/_create.py +10 -25
  1056. scitex/writer/_project/_trees.py +4 -9
  1057. scitex/writer/_project/_validate.py +2 -3
  1058. scitex/writer/_validate_tree_structures.py +7 -18
  1059. scitex/writer/dataclasses/__init__.py +8 -10
  1060. scitex/writer/dataclasses/config/_CONSTANTS.py +2 -3
  1061. scitex/writer/dataclasses/config/_WriterConfig.py +4 -9
  1062. scitex/writer/dataclasses/contents/_ManuscriptContents.py +14 -25
  1063. scitex/writer/dataclasses/contents/_RevisionContents.py +21 -16
  1064. scitex/writer/dataclasses/contents/_SupplementaryContents.py +21 -24
  1065. scitex/writer/dataclasses/core/_Document.py +2 -3
  1066. scitex/writer/dataclasses/core/_DocumentSection.py +8 -23
  1067. scitex/writer/dataclasses/results/_CompilationResult.py +2 -3
  1068. scitex/writer/dataclasses/results/_LaTeXIssue.py +3 -6
  1069. scitex/writer/dataclasses/results/_SaveSectionsResponse.py +20 -9
  1070. scitex/writer/dataclasses/results/_SectionReadResponse.py +24 -10
  1071. scitex/writer/dataclasses/tree/_ConfigTree.py +7 -4
  1072. scitex/writer/dataclasses/tree/_ManuscriptTree.py +10 -13
  1073. scitex/writer/dataclasses/tree/_RevisionTree.py +16 -17
  1074. scitex/writer/dataclasses/tree/_ScriptsTree.py +10 -5
  1075. scitex/writer/dataclasses/tree/_SharedTree.py +10 -13
  1076. scitex/writer/dataclasses/tree/_SupplementaryTree.py +15 -14
  1077. scitex/writer/utils/.legacy_git_retry.py +3 -8
  1078. scitex/writer/utils/_parse_latex_logs.py +2 -3
  1079. scitex/writer/utils/_parse_script_args.py +20 -23
  1080. scitex/writer/utils/_watch.py +5 -5
  1081. {scitex-2.5.0.dist-info → scitex-2.7.0.dist-info}/METADATA +4 -10
  1082. {scitex-2.5.0.dist-info → scitex-2.7.0.dist-info}/RECORD +1071 -975
  1083. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +0 -583
  1084. scitex/plt/_subplots/_export_as_csv_formatters.py +0 -112
  1085. scitex/vis/editor/flask_editor/bbox.py +0 -216
  1086. scitex/vis/editor/flask_editor/plotter.py +0 -130
  1087. scitex/vis/editor/flask_editor/renderer.py +0 -184
  1088. scitex/vis/editor/flask_editor/templates/scripts.py +0 -614
  1089. {scitex-2.5.0.dist-info → scitex-2.7.0.dist-info}/WHEEL +0 -0
  1090. {scitex-2.5.0.dist-info → scitex-2.7.0.dist-info}/entry_points.txt +0 -0
  1091. {scitex-2.5.0.dist-info → scitex-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,6 +4,7 @@
4
4
  # File: ./src/scitex/scholar/_search_unified.py
5
5
  # ----------------------------------------
6
6
  import os
7
+
7
8
  __FILE__ = __file__
8
9
  __DIR__ = os.path.dirname(__FILE__)
9
10
  # ----------------------------------------
@@ -38,19 +39,20 @@ logger = logging.getLogger(__name__)
38
39
 
39
40
  class SearchEngine:
40
41
  """Base class for all search engines."""
41
-
42
+
42
43
  def __init__(self, name: str):
43
44
  self.name = name
44
45
  self.rate_limit = 0.1 # seconds between requests
45
46
  self._last_request = 0
46
-
47
+
47
48
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
48
49
  """Search for papers. Must be implemented by subclasses."""
49
50
  raise NotImplementedError
50
-
51
+
51
52
  async def _rate_limit_async(self):
52
53
  """Enforce rate limiting."""
53
54
  import time
55
+
54
56
  now = time.time()
55
57
  elapsed = now - self._last_request
56
58
  if elapsed < self.rate_limit:
@@ -60,75 +62,76 @@ class SearchEngine:
60
62
 
61
63
  class SemanticScholarEngine(SearchEngine):
62
64
  """Semantic Scholar search engine."""
63
-
65
+
64
66
  def __init__(self, api_key: Optional[str] = None):
65
67
  super().__init__("semantic_scholar")
66
68
  self.api_key = api_key
67
69
  self.base_url = "https://api.semanticscholar.org/graph/v1"
68
70
  self.rate_limit = 0.1 if api_key else 1.0 # Faster with API key
69
-
70
-
71
+
71
72
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
72
73
  """Search Semantic Scholar for papers."""
73
74
  await self._rate_limit_async()
74
-
75
+
75
76
  # Check if query is for a specific paper ID
76
- if query.startswith('CorpusId:'):
77
- corpus_id = query.replace('CorpusId:', '').strip()
77
+ if query.startswith("CorpusId:"):
78
+ corpus_id = query.replace("CorpusId:", "").strip()
78
79
  paper = await self._fetch_paper_by_id_async(f"CorpusId:{corpus_id}")
79
80
  return [paper] if paper else []
80
-
81
+
81
82
  headers = {}
82
83
  if self.api_key:
83
- headers['x-api-key'] = self.api_key
84
-
84
+ headers["x-api-key"] = self.api_key
85
+
85
86
  params = {
86
- 'query': query,
87
- 'limit': min(limit, 100),
88
- 'fields': 'title,authors,abstract,year,citationCount,journal,paperId,venue,fieldsOfStudy,isOpenAccess,url,tldr,doi,externalIds'
87
+ "query": query,
88
+ "limit": min(limit, 100),
89
+ "fields": "title,authors,abstract,year,citationCount,journal,paperId,venue,fieldsOfStudy,isOpenAccess,url,tldr,doi,externalIds",
89
90
  }
90
-
91
+
91
92
  # Add year filters if provided
92
- if 'year_min' in kwargs:
93
- params['year'] = f"{kwargs['year_min']}-"
94
- if 'year_max' in kwargs:
95
- if 'year' in params:
96
- params['year'] = f"{kwargs['year_min']}-{kwargs['year_max']}"
93
+ if "year_min" in kwargs:
94
+ params["year"] = f"{kwargs['year_min']}-"
95
+ if "year_max" in kwargs:
96
+ if "year" in params:
97
+ params["year"] = f"{kwargs['year_min']}-{kwargs['year_max']}"
97
98
  else:
98
- params['year'] = f"-{kwargs['year_max']}"
99
-
99
+ params["year"] = f"-{kwargs['year_max']}"
100
+
100
101
  papers = []
101
-
102
+
102
103
  try:
103
104
  async with aiohttp.ClientSession() as session:
104
105
  async with session.get(
105
- f"{self.base_url}/paper/search",
106
- params=params,
107
- headers=headers
106
+ f"{self.base_url}/paper/search", params=params, headers=headers
108
107
  ) as response:
109
108
  if response.status == 200:
110
109
  data = await response.json()
111
-
112
- for item in data.get('data', []):
110
+
111
+ for item in data.get("data", []):
113
112
  paper = self._parse_semantic_scholar_paper(item)
114
113
  if paper:
115
114
  papers.append(paper)
116
115
  else:
117
116
  error_msg = await response.text()
118
-
117
+
119
118
  if response.status == 429:
120
119
  # Rate limiting - show_async this to user
121
- logger.warning("Semantic Scholar rate limit reached. Please wait a moment or get a free API key at https://www.semanticscholar.org/product/api")
120
+ logger.warning(
121
+ "Semantic Scholar rate limit reached. Please wait a moment or get a free API key at https://www.semanticscholar.org/product/api"
122
+ )
122
123
  raise SearchError(
123
124
  query=query,
124
125
  source="semantic_scholar",
125
- reason="Rate limit reached. Please wait 1-2 seconds between searches or get a free API key."
126
+ reason="Rate limit reached. Please wait 1-2 seconds between searches or get a free API key.",
126
127
  )
127
128
  else:
128
129
  # Other errors - just log
129
- logger.debug(f"Semantic Scholar API returned {response.status}: {error_msg}")
130
+ logger.debug(
131
+ f"Semantic Scholar API returned {response.status}: {error_msg}"
132
+ )
130
133
  return []
131
-
134
+
132
135
  except SearchError:
133
136
  # Re-raise SearchError so user sees it
134
137
  raise
@@ -136,23 +139,23 @@ class SemanticScholarEngine(SearchEngine):
136
139
  logger.debug(f"Semantic Scholar search error: {e}")
137
140
  # Return empty list instead of raising to allow fallback to other sources
138
141
  return []
139
-
142
+
140
143
  return papers
141
-
144
+
142
145
  async def _fetch_paper_by_id_async(self, paper_id: str) -> Optional[Paper]:
143
146
  """Fetch a specific paper by its ID (CorpusId, DOI, arXiv ID, etc.)."""
144
147
  await self._rate_limit_async()
145
-
148
+
146
149
  headers = {}
147
150
  if self.api_key:
148
- headers['x-api-key'] = self.api_key
149
-
151
+ headers["x-api-key"] = self.api_key
152
+
150
153
  # Build URL for fetching paper by ID
151
154
  url = f"{self.base_url}/paper/{paper_id}"
152
155
  params = {
153
- 'fields': 'title,authors,abstract,year,citationCount,journal,paperId,venue,fieldsOfStudy,isOpenAccess,url,tldr,externalIds'
156
+ "fields": "title,authors,abstract,year,citationCount,journal,paperId,venue,fieldsOfStudy,isOpenAccess,url,tldr,externalIds"
154
157
  }
155
-
158
+
156
159
  try:
157
160
  async with aiohttp.ClientSession() as session:
158
161
  async with session.get(url, params=params, headers=headers) as response:
@@ -160,68 +163,73 @@ class SemanticScholarEngine(SearchEngine):
160
163
  data = await response.json()
161
164
  return self._parse_semantic_scholar_paper(data)
162
165
  else:
163
- logger.debug(f"Failed to fetch paper {paper_id}: {response.status}")
166
+ logger.debug(
167
+ f"Failed to fetch paper {paper_id}: {response.status}"
168
+ )
164
169
  return None
165
-
170
+
166
171
  except Exception as e:
167
172
  logger.debug(f"Error fetching paper {paper_id}: {e}")
168
173
  return None
169
-
174
+
170
175
  def _parse_semantic_scholar_paper(self, data: Dict[str, Any]) -> Optional[Paper]:
171
176
  """Parse Semantic Scholar paper data."""
172
177
  if not data or not isinstance(data, dict):
173
178
  logger.warning("Received None or non-dict data for Semantic Scholar paper")
174
179
  return None
175
-
180
+
176
181
  try:
177
182
  # Extract authors
178
183
  authors = []
179
- for author_data in data.get('authors', []):
180
- name = author_data.get('name', '')
184
+ for author_data in data.get("authors", []):
185
+ name = author_data.get("name", "")
181
186
  if name:
182
187
  authors.append(name)
183
-
188
+
184
189
  # Get PDF URL if available
185
190
  pdf_url = None
186
- if data.get('isOpenAccess'):
187
- pdf_url = data.get('url')
188
-
191
+ if data.get("isOpenAccess"):
192
+ pdf_url = data.get("url")
193
+
189
194
  # Extract journal/venue
190
- journal_data = data.get('journal')
191
- journal = ''
195
+ journal_data = data.get("journal")
196
+ journal = ""
192
197
  if journal_data and isinstance(journal_data, dict):
193
- journal = journal_data.get('name', '')
198
+ journal = journal_data.get("name", "")
194
199
  if not journal:
195
- journal = data.get('venue', '')
196
-
200
+ journal = data.get("venue", "")
201
+
197
202
  # Extract DOI from externalIds if not directly available
198
- doi = data.get('doi')
199
- if not doi and data.get('externalIds'):
200
- doi = data.get('externalIds', {}).get('DOI')
201
-
203
+ doi = data.get("doi")
204
+ if not doi and data.get("externalIds"):
205
+ doi = data.get("externalIds", {}).get("DOI")
206
+
202
207
  # Create paper
203
208
  paper = Paper(
204
- title=data.get('title', ''),
209
+ title=data.get("title", ""),
205
210
  authors=authors,
206
- abstract=data.get('abstract', '') or (data.get('tldr', {}) or {}).get('text', ''),
207
- year=data.get('year'),
211
+ abstract=data.get("abstract", "")
212
+ or (data.get("tldr", {}) or {}).get("text", ""),
213
+ year=data.get("year"),
208
214
  doi=doi,
209
215
  journal=journal,
210
- keywords=data.get('fieldsOfStudy', []),
211
- citation_count=data.get('citationCount', 0),
216
+ keywords=data.get("fieldsOfStudy", []),
217
+ citation_count=data.get("citationCount", 0),
212
218
  pdf_url=pdf_url,
213
219
  source="semantic_scholar",
214
220
  metadata={
215
- 'semantic_scholar_paper_id': data.get('paperId'),
216
- 'fields_of_study': data.get('fieldsOfStudy', []),
217
- 'is_open_access': data.get('isOpenAccess', False),
218
- 'citation_count_source': 'Semantic Scholar' if data.get('citationCount') is not None else None,
219
- 'external_ids': data.get('externalIds', {})
220
- }
221
+ "semantic_scholar_paper_id": data.get("paperId"),
222
+ "fields_of_study": data.get("fieldsOfStudy", []),
223
+ "is_open_access": data.get("isOpenAccess", False),
224
+ "citation_count_source": "Semantic Scholar"
225
+ if data.get("citationCount") is not None
226
+ else None,
227
+ "external_ids": data.get("externalIds", {}),
228
+ },
221
229
  )
222
-
230
+
223
231
  return paper
224
-
232
+
225
233
  except Exception as e:
226
234
  logger.warning(f"Failed to parse Semantic Scholar paper: {e}")
227
235
  return None
@@ -229,158 +237,165 @@ class SemanticScholarEngine(SearchEngine):
229
237
 
230
238
  class PubMedEngine(SearchEngine):
231
239
  """PubMed search engine using E-utilities."""
232
-
240
+
233
241
  def __init__(self, email: Optional[str] = None):
234
242
  super().__init__("pubmed")
235
243
  self.email = email or "research@example.com"
236
244
  self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
237
245
  self.rate_limit = 0.4 # NCBI rate limit
238
-
246
+
239
247
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
240
248
  """Search PubMed for papers."""
241
249
  await self._rate_limit_async()
242
-
250
+
243
251
  # First, search for IDs
244
252
  search_params = {
245
- 'db': 'pubmed',
246
- 'term': query,
247
- 'retmax': limit,
248
- 'retmode': 'json',
249
- 'email': self.email,
250
- 'sort': 'relevance' # Sort by relevance instead of date to get diverse years
253
+ "db": "pubmed",
254
+ "term": query,
255
+ "retmax": limit,
256
+ "retmode": "json",
257
+ "email": self.email,
258
+ "sort": "relevance", # Sort by relevance instead of date to get diverse years
251
259
  }
252
-
260
+
253
261
  # Add date filters
254
- year_min = kwargs.get('year_min')
255
- year_max = kwargs.get('year_max')
262
+ year_min = kwargs.get("year_min")
263
+ year_max = kwargs.get("year_max")
256
264
  if year_min is not None or year_max is not None:
257
265
  min_date = f"{year_min or 1900}/01/01"
258
266
  max_date = f"{year_max or datetime.now().year}/12/31"
259
- search_params['mindate'] = min_date
260
- search_params['maxdate'] = max_date
261
- search_params['datetype'] = 'pdat' # Publication date
267
+ search_params["mindate"] = min_date
268
+ search_params["maxdate"] = max_date
269
+ search_params["datetype"] = "pdat" # Publication date
262
270
  else:
263
271
  # When no date range specified, search last 20 years to avoid only getting current year
264
272
  current_year = datetime.now().year
265
- search_params['mindate'] = f"{current_year - 20}/01/01"
266
- search_params['maxdate'] = f"{current_year}/12/31"
267
- search_params['datetype'] = 'pdat'
268
-
273
+ search_params["mindate"] = f"{current_year - 20}/01/01"
274
+ search_params["maxdate"] = f"{current_year}/12/31"
275
+ search_params["datetype"] = "pdat"
276
+
269
277
  papers = []
270
-
278
+
271
279
  try:
272
280
  async with aiohttp.ClientSession() as session:
273
281
  # Search for IDs
274
282
  logger.info(f"PubMed API URL: {self.base_url}/esearch.fcgi")
275
283
  logger.info(f"PubMed search params: {search_params}")
276
284
  async with session.get(
277
- f"{self.base_url}/esearch.fcgi",
278
- params=search_params
285
+ f"{self.base_url}/esearch.fcgi", params=search_params
279
286
  ) as response:
280
287
  if response.status == 200:
281
288
  data = await response.json()
282
- pmids = data.get('esearchresult', {}).get('idlist', [])
289
+ pmids = data.get("esearchresult", {}).get("idlist", [])
283
290
  logger.info(f"PubMed search returned {len(pmids)} PMIDs")
284
-
291
+
285
292
  if pmids:
286
293
  # Fetch details
287
- papers = await self._fetch_pubmed_details_async(session, pmids)
294
+ papers = await self._fetch_pubmed_details_async(
295
+ session, pmids
296
+ )
288
297
  else:
289
298
  logger.error(f"PubMed search failed: {response.status}")
290
-
299
+
291
300
  except Exception as e:
292
301
  logger.error(f"PubMed search error: {type(e).__name__}: {e}")
293
302
  import traceback
303
+
294
304
  logger.error(traceback.format_exc())
295
305
  # Return empty list instead of raising to allow other sources
296
306
  return []
297
-
307
+
298
308
  return papers
299
-
300
- async def _fetch_pubmed_details_async(self, session: aiohttp.ClientSession, pmids: List[str]) -> List[Paper]:
309
+
310
+ async def _fetch_pubmed_details_async(
311
+ self, session: aiohttp.ClientSession, pmids: List[str]
312
+ ) -> List[Paper]:
301
313
  """Fetch detailed information for PubMed IDs."""
302
314
  await self._rate_limit_async()
303
-
315
+
304
316
  fetch_params = {
305
- 'db': 'pubmed',
306
- 'id': ','.join(pmids),
307
- 'retmode': 'xml',
308
- 'email': self.email
317
+ "db": "pubmed",
318
+ "id": ",".join(pmids),
319
+ "retmode": "xml",
320
+ "email": self.email,
309
321
  }
310
-
322
+
311
323
  papers = []
312
-
324
+
313
325
  async with session.get(
314
- f"{self.base_url}/efetch.fcgi",
315
- params=fetch_params
326
+ f"{self.base_url}/efetch.fcgi", params=fetch_params
316
327
  ) as response:
317
328
  if response.status == 200:
318
329
  xml_data = await response.text()
319
330
  papers = self._parse_pubmed_xml(xml_data)
320
331
  else:
321
332
  logger.error(f"PubMed fetch failed: {response.status}")
322
-
333
+
323
334
  return papers
324
-
335
+
325
336
  def _parse_pubmed_xml(self, xml_data: str) -> List[Paper]:
326
337
  """Parse PubMed XML response."""
327
338
  papers = []
328
-
339
+
329
340
  try:
330
341
  root = ET.fromstring(xml_data)
331
-
332
- for article_elem in root.findall('.//PubmedArticle'):
342
+
343
+ for article_elem in root.findall(".//PubmedArticle"):
333
344
  try:
334
345
  # Extract article data
335
- medline = article_elem.find('.//MedlineCitation')
346
+ medline = article_elem.find(".//MedlineCitation")
336
347
  if medline is None:
337
348
  continue
338
-
349
+
339
350
  # Title
340
- title_elem = medline.find('.//ArticleTitle')
341
- title = title_elem.text if title_elem is not None else ''
342
-
351
+ title_elem = medline.find(".//ArticleTitle")
352
+ title = title_elem.text if title_elem is not None else ""
353
+
343
354
  # Authors
344
355
  authors = []
345
- for author_elem in medline.findall('.//Author'):
346
- last_name = author_elem.findtext('LastName', '')
347
- first_name = author_elem.findtext('ForeName', '')
356
+ for author_elem in medline.findall(".//Author"):
357
+ last_name = author_elem.findtext("LastName", "")
358
+ first_name = author_elem.findtext("ForeName", "")
348
359
  if last_name:
349
- name = f"{last_name}, {first_name}" if first_name else last_name
360
+ name = (
361
+ f"{last_name}, {first_name}"
362
+ if first_name
363
+ else last_name
364
+ )
350
365
  authors.append(name)
351
-
366
+
352
367
  # Abstract
353
368
  abstract_parts = []
354
- for abstract_elem in medline.findall('.//AbstractText'):
355
- text = abstract_elem.text or ''
369
+ for abstract_elem in medline.findall(".//AbstractText"):
370
+ text = abstract_elem.text or ""
356
371
  abstract_parts.append(text)
357
- abstract = ' '.join(abstract_parts)
358
-
372
+ abstract = " ".join(abstract_parts)
373
+
359
374
  # Year
360
- year_elem = medline.find('.//PubDate/Year')
375
+ year_elem = medline.find(".//PubDate/Year")
361
376
  year = year_elem.text if year_elem is not None else None
362
-
377
+
363
378
  # Journal
364
- journal_elem = medline.find('.//Journal/Title')
365
- journal = journal_elem.text if journal_elem is not None else ''
366
-
379
+ journal_elem = medline.find(".//Journal/Title")
380
+ journal = journal_elem.text if journal_elem is not None else ""
381
+
367
382
  # PMID
368
- pmid_elem = medline.find('.//PMID')
369
- pmid = pmid_elem.text if pmid_elem is not None else ''
370
-
383
+ pmid_elem = medline.find(".//PMID")
384
+ pmid = pmid_elem.text if pmid_elem is not None else ""
385
+
371
386
  # DOI
372
387
  doi = None
373
- for id_elem in article_elem.findall('.//ArticleId'):
374
- if id_elem.get('IdType') == 'doi':
388
+ for id_elem in article_elem.findall(".//ArticleId"):
389
+ if id_elem.get("IdType") == "doi":
375
390
  doi = id_elem.text
376
391
  break
377
-
392
+
378
393
  # Keywords
379
394
  keywords = []
380
- for kw_elem in medline.findall('.//MeshHeading/DescriptorName'):
395
+ for kw_elem in medline.findall(".//MeshHeading/DescriptorName"):
381
396
  if kw_elem.text:
382
397
  keywords.append(kw_elem.text)
383
-
398
+
384
399
  paper = Paper(
385
400
  title=title,
386
401
  authors=authors,
@@ -390,43 +405,43 @@ class PubMedEngine(SearchEngine):
390
405
  pmid=pmid,
391
406
  journal=journal,
392
407
  keywords=keywords,
393
- source="pubmed"
408
+ source="pubmed",
394
409
  )
395
-
410
+
396
411
  papers.append(paper)
397
-
412
+
398
413
  except Exception as e:
399
414
  logger.warning(f"Failed to parse PubMed article: {e}")
400
415
  continue
401
-
416
+
402
417
  except Exception as e:
403
418
  logger.error(f"Failed to parse PubMed XML: {e}")
404
-
419
+
405
420
  return papers
406
421
 
407
422
 
408
423
  class ArxivEngine(SearchEngine):
409
424
  """arXiv search engine."""
410
-
425
+
411
426
  def __init__(self):
412
427
  super().__init__("arxiv")
413
428
  self.base_url = "http://export.arxiv.org/api/query"
414
429
  self.rate_limit = 0.5
415
-
430
+
416
431
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
417
432
  """Search arXiv for papers."""
418
433
  await self._rate_limit_async()
419
-
434
+
420
435
  params = {
421
- 'search_query': f'all:{query}',
422
- 'start': 0,
423
- 'max_results': limit,
424
- 'sortBy': 'relevance',
425
- 'sortOrder': 'descending'
436
+ "search_query": f"all:{query}",
437
+ "start": 0,
438
+ "max_results": limit,
439
+ "sortBy": "relevance",
440
+ "sortOrder": "descending",
426
441
  }
427
-
442
+
428
443
  papers = []
429
-
444
+
430
445
  try:
431
446
  async with aiohttp.ClientSession() as session:
432
447
  async with session.get(self.base_url, params=params) as response:
@@ -435,61 +450,63 @@ class ArxivEngine(SearchEngine):
435
450
  papers = self._parse_arxiv_xml(xml_data)
436
451
  else:
437
452
  logger.error(f"arXiv search failed: {response.status}")
438
-
453
+
439
454
  except Exception as e:
440
455
  logger.error(f"arXiv search error: {e}")
441
456
  raise SearchError(query, "arXiv", str(e))
442
-
457
+
443
458
  return papers
444
-
459
+
445
460
  def _parse_arxiv_xml(self, xml_data: str) -> List[Paper]:
446
461
  """Parse arXiv XML response."""
447
462
  papers = []
448
-
463
+
449
464
  try:
450
465
  # Parse XML with namespace
451
466
  root = ET.fromstring(xml_data)
452
- ns = {'atom': 'http://www.w3.org/2005/Atom'}
453
-
454
- for entry in root.findall('atom:entry', ns):
467
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
468
+
469
+ for entry in root.findall("atom:entry", ns):
455
470
  try:
456
471
  # Title
457
- title_elem = entry.find('atom:title', ns)
458
- title = title_elem.text.strip() if title_elem is not None else ''
459
-
472
+ title_elem = entry.find("atom:title", ns)
473
+ title = title_elem.text.strip() if title_elem is not None else ""
474
+
460
475
  # Authors
461
476
  authors = []
462
- for author_elem in entry.findall('atom:author', ns):
463
- name_elem = author_elem.find('atom:name', ns)
477
+ for author_elem in entry.findall("atom:author", ns):
478
+ name_elem = author_elem.find("atom:name", ns)
464
479
  if name_elem is not None and name_elem.text:
465
480
  authors.append(name_elem.text)
466
-
481
+
467
482
  # Abstract
468
- summary_elem = entry.find('atom:summary', ns)
469
- abstract = summary_elem.text.strip() if summary_elem is not None else ''
470
-
483
+ summary_elem = entry.find("atom:summary", ns)
484
+ abstract = (
485
+ summary_elem.text.strip() if summary_elem is not None else ""
486
+ )
487
+
471
488
  # Year
472
- published_elem = entry.find('atom:published', ns)
489
+ published_elem = entry.find("atom:published", ns)
473
490
  year = None
474
491
  if published_elem is not None and published_elem.text:
475
492
  year = published_elem.text[:4]
476
-
493
+
477
494
  # arXiv ID
478
- id_elem = entry.find('atom:id', ns)
495
+ id_elem = entry.find("atom:id", ns)
479
496
  arxiv_id = None
480
497
  pdf_url = None
481
498
  if id_elem is not None and id_elem.text:
482
499
  # Extract ID from URL
483
- arxiv_id = id_elem.text.split('/')[-1]
500
+ arxiv_id = id_elem.text.split("/")[-1]
484
501
  pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
485
-
502
+
486
503
  # Categories (as keywords)
487
504
  keywords = []
488
- for cat_elem in entry.findall('atom:category', ns):
489
- term = cat_elem.get('term')
505
+ for cat_elem in entry.findall("atom:category", ns):
506
+ term = cat_elem.get("term")
490
507
  if term:
491
508
  keywords.append(term)
492
-
509
+
493
510
  paper = Paper(
494
511
  title=title,
495
512
  authors=authors,
@@ -498,146 +515,140 @@ class ArxivEngine(SearchEngine):
498
515
  arxiv_id=arxiv_id,
499
516
  keywords=keywords,
500
517
  pdf_url=pdf_url,
501
- source="arxiv"
518
+ source="arxiv",
502
519
  )
503
-
520
+
504
521
  papers.append(paper)
505
-
522
+
506
523
  except Exception as e:
507
524
  logger.warning(f"Failed to parse arXiv entry: {e}")
508
525
  continue
509
-
526
+
510
527
  except Exception as e:
511
528
  logger.error(f"Failed to parse arXiv XML: {e}")
512
-
529
+
513
530
  return papers
514
531
 
515
532
 
516
533
  class CrossRefEngine(SearchEngine):
517
534
  """CrossRef search engine for academic papers."""
518
-
535
+
519
536
  def __init__(self, api_key: Optional[str] = None, email: Optional[str] = None):
520
537
  super().__init__("crossref")
521
538
  self.api_key = api_key
522
539
  self.email = email or "research@example.com"
523
540
  self.base_url = "https://api.crossref.org/works"
524
541
  self.rate_limit = 0.5 # CrossRef recommends 50ms between requests
525
-
542
+
526
543
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
527
544
  """Search CrossRef for papers."""
528
545
  await self._rate_limit_async()
529
-
546
+
530
547
  # Build query parameters
531
548
  params = {
532
- 'query': query,
533
- 'rows': min(limit, 1000), # CrossRef max is 1000
534
- 'sort': 'relevance',
535
- 'order': 'desc'
549
+ "query": query,
550
+ "rows": min(limit, 1000), # CrossRef max is 1000
551
+ "sort": "relevance",
552
+ "order": "desc",
536
553
  }
537
-
554
+
538
555
  # Add filters for year if provided
539
556
  filters = []
540
- if 'year_min' in kwargs and kwargs['year_min'] is not None:
557
+ if "year_min" in kwargs and kwargs["year_min"] is not None:
541
558
  filters.append(f"from-pub-date:{kwargs['year_min']}")
542
- if 'year_max' in kwargs and kwargs['year_max'] is not None:
559
+ if "year_max" in kwargs and kwargs["year_max"] is not None:
543
560
  filters.append(f"until-pub-date:{kwargs['year_max']}")
544
-
561
+
545
562
  if filters:
546
- params['filter'] = ','.join(filters)
547
-
563
+ params["filter"] = ",".join(filters)
564
+
548
565
  # Add API key if available
549
566
  if self.api_key:
550
- params['key'] = self.api_key
551
-
567
+ params["key"] = self.api_key
568
+
552
569
  # Headers with user agent
553
- headers = {
554
- 'User-Agent': f'SciTeX/1.0 (mailto:{self.email})'
555
- }
556
-
570
+ headers = {"User-Agent": f"SciTeX/1.0 (mailto:{self.email})"}
571
+
557
572
  papers = []
558
-
573
+
559
574
  try:
560
575
  async with aiohttp.ClientSession() as session:
561
- async with session.get(self.base_url, params=params, headers=headers) as response:
576
+ async with session.get(
577
+ self.base_url, params=params, headers=headers
578
+ ) as response:
562
579
  if response.status == 200:
563
580
  data = await response.json()
564
581
  papers = self._parse_crossref_response(data)
565
582
  else:
566
583
  error_text = await response.text()
567
- logger.error(f"CrossRef search failed: {response.status} - {error_text}")
584
+ logger.error(
585
+ f"CrossRef search failed: {response.status} - {error_text}"
586
+ )
568
587
  raise SearchError(
569
588
  query=query,
570
589
  source="crossref",
571
- reason=f"API returned status {response.status}"
590
+ reason=f"API returned status {response.status}",
572
591
  )
573
-
592
+
574
593
  except asyncio.TimeoutError:
575
594
  logger.error("CrossRef search timed out")
576
- raise SearchError(
577
- query=query,
578
- source="crossref",
579
- reason="Search timed out"
580
- )
595
+ raise SearchError(query=query, source="crossref", reason="Search timed out")
581
596
  except Exception as e:
582
597
  logger.error(f"CrossRef search error: {e}")
583
- raise SearchError(
584
- query=query,
585
- source="crossref",
586
- reason=str(e)
587
- )
588
-
598
+ raise SearchError(query=query, source="crossref", reason=str(e))
599
+
589
600
  return papers
590
-
601
+
591
602
  def _parse_crossref_response(self, data: Dict[str, Any]) -> List[Paper]:
592
603
  """Parse CrossRef API response into Paper objects."""
593
604
  papers = []
594
-
595
- items = data.get('message', {}).get('items', [])
596
-
605
+
606
+ items = data.get("message", {}).get("items", [])
607
+
597
608
  for item in items:
598
609
  try:
599
610
  # Extract basic metadata
600
- title = ' '.join(item.get('title', ['No title']))
601
-
611
+ title = " ".join(item.get("title", ["No title"]))
612
+
602
613
  # Authors
603
614
  authors = []
604
- for author in item.get('author', []):
605
- given = author.get('given', '')
606
- family = author.get('family', '')
615
+ for author in item.get("author", []):
616
+ given = author.get("given", "")
617
+ family = author.get("family", "")
607
618
  if given and family:
608
619
  authors.append(f"{given} {family}")
609
620
  elif family:
610
621
  authors.append(family)
611
-
622
+
612
623
  # Abstract - CrossRef doesn't always have abstracts
613
- abstract = item.get('abstract', '')
614
-
624
+ abstract = item.get("abstract", "")
625
+
615
626
  # Year from published-print or published-online
616
627
  year = None
617
- published = item.get('published-print') or item.get('published-online')
618
- if published and 'date-parts' in published:
619
- date_parts = published['date-parts']
628
+ published = item.get("published-print") or item.get("published-online")
629
+ if published and "date-parts" in published:
630
+ date_parts = published["date-parts"]
620
631
  if date_parts and date_parts[0]:
621
632
  year = str(date_parts[0][0])
622
-
633
+
623
634
  # Journal
624
635
  journal = None
625
- container_title = item.get('container-title', [])
636
+ container_title = item.get("container-title", [])
626
637
  if container_title:
627
638
  journal = container_title[0]
628
-
639
+
629
640
  # DOI
630
- doi = item.get('DOI')
631
-
641
+ doi = item.get("DOI")
642
+
632
643
  # Citation count
633
- citation_count = item.get('is-referenced-by-count', 0)
634
-
644
+ citation_count = item.get("is-referenced-by-count", 0)
645
+
635
646
  # Keywords/subjects
636
- keywords = item.get('subject', [])
637
-
647
+ keywords = item.get("subject", [])
648
+
638
649
  # URL
639
- url = item.get('URL')
640
-
650
+ url = item.get("URL")
651
+
641
652
  paper = Paper(
642
653
  title=title,
643
654
  authors=authors,
@@ -649,38 +660,39 @@ class CrossRefEngine(SearchEngine):
649
660
  citation_count=citation_count,
650
661
  source="crossref",
651
662
  metadata={
652
- 'citation_count_source': 'CrossRef',
653
- 'url': url,
654
- 'publisher': item.get('publisher'),
655
- 'issn': item.get('ISSN', []),
656
- 'type': item.get('type'),
657
- 'score': item.get('score')
658
- }
663
+ "citation_count_source": "CrossRef",
664
+ "url": url,
665
+ "publisher": item.get("publisher"),
666
+ "issn": item.get("ISSN", []),
667
+ "type": item.get("type"),
668
+ "score": item.get("score"),
669
+ },
659
670
  )
660
-
671
+
661
672
  papers.append(paper)
662
-
673
+
663
674
  except Exception as e:
664
675
  logger.warning(f"Failed to parse CrossRef item: {e}")
665
676
  continue
666
-
677
+
667
678
  return papers
668
679
 
669
680
 
670
681
  class GoogleScholarEngine(SearchEngine):
671
682
  """Search engine for Google Scholar using scholarly package."""
672
-
683
+
673
684
  def __init__(self, timeout: int = 10):
674
685
  super().__init__("google_scholar")
675
686
  self.rate_limit = 2.0 # Be respectful to Google Scholar
676
687
  self._scholarly = None
677
688
  self.timeout = timeout
678
-
689
+
679
690
  def _init_scholarly(self):
680
691
  """Lazy load scholarly package."""
681
692
  if self._scholarly is None:
682
693
  try:
683
694
  from scholarly import scholarly
695
+
684
696
  self._scholarly = scholarly
685
697
  # Configure proxy to avoid blocking (optional)
686
698
  # from scholarly import ProxyGenerator
@@ -693,25 +705,25 @@ class GoogleScholarEngine(SearchEngine):
693
705
  "Install with: pip install scholarly"
694
706
  )
695
707
  return self._scholarly
696
-
708
+
697
709
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
698
710
  """
699
711
  Search Google Scholar for papers.
700
-
712
+
701
713
  Args:
702
714
  query: Search query
703
715
  limit: Maximum number of results
704
716
  **kwargs: Additional parameters (year_min, year_max)
705
-
717
+
706
718
  Returns:
707
719
  List of Paper objects
708
720
  """
709
721
  papers = []
710
-
722
+
711
723
  try:
712
724
  # Initialize scholarly
713
725
  scholarly = self._init_scholarly()
714
-
726
+
715
727
  # Quick test to see if Google Scholar is accessible
716
728
  try:
717
729
  # Try a minimal search to detect blocking immediately
@@ -723,27 +735,27 @@ class GoogleScholarEngine(SearchEngine):
723
735
  raise SearchError(
724
736
  query=query,
725
737
  source="google_scholar",
726
- reason="Google Scholar is blocking automated access. Use PubMed or Semantic Scholar instead."
738
+ reason="Google Scholar is blocking automated access. Use PubMed or Semantic Scholar instead.",
727
739
  )
728
-
740
+
729
741
  # Apply year filters if provided
730
- year_min = kwargs.get('year_min')
731
- year_max = kwargs.get('year_max')
732
-
742
+ year_min = kwargs.get("year_min")
743
+ year_max = kwargs.get("year_max")
744
+
733
745
  # Build query with year filters
734
746
  search_query = query
735
747
  if year_min and year_max:
736
- search_query += f" after:{year_min-1} before:{year_max+1}"
748
+ search_query += f" after:{year_min - 1} before:{year_max + 1}"
737
749
  elif year_min:
738
- search_query += f" after:{year_min-1}"
750
+ search_query += f" after:{year_min - 1}"
739
751
  elif year_max:
740
- search_query += f" before:{year_max+1}"
741
-
752
+ search_query += f" before:{year_max + 1}"
753
+
742
754
  logger.info(f"Searching Google Scholar: {search_query}")
743
-
755
+
744
756
  # Run search in executor with timeout to avoid blocking
745
757
  loop = asyncio.get_event_loop()
746
-
758
+
747
759
  def search_with_limit():
748
760
  """Search and limit results to avoid hanging."""
749
761
  results = []
@@ -757,222 +769,231 @@ class GoogleScholarEngine(SearchEngine):
757
769
  logger.warning(f"Google Scholar search interrupted: {e}")
758
770
  # Common error from scholarly when blocked
759
771
  if "Cannot Fetch" in str(e) or "403" in str(e):
760
- logger.info("Google Scholar is blocking automated requests. This is common due to anti-bot measures.")
772
+ logger.info(
773
+ "Google Scholar is blocking automated requests. This is common due to anti-bot measures."
774
+ )
761
775
  return results
762
-
776
+
763
777
  # Apply timeout to prevent hanging
764
778
  try:
765
779
  search_results = await asyncio.wait_for(
766
- loop.run_in_executor(None, search_with_limit),
767
- timeout=self.timeout
780
+ loop.run_in_executor(None, search_with_limit), timeout=self.timeout
768
781
  )
769
782
  except asyncio.TimeoutError:
770
- logger.warning(f"Google Scholar search timed out after {self.timeout} seconds")
783
+ logger.warning(
784
+ f"Google Scholar search timed out after {self.timeout} seconds"
785
+ )
771
786
  raise SearchError(
772
787
  query=search_query,
773
788
  source="google_scholar",
774
- reason=f"Search timed out after {self.timeout} seconds. Google Scholar may be blocking requests. You can increase timeout with SCITEX_SCHOLAR_GOOGLE_SCHOLAR_TIMEOUT environment variable."
789
+ reason=f"Search timed out after {self.timeout} seconds. Google Scholar may be blocking requests. You can increase timeout with SCITEX_SCHOLAR_GOOGLE_SCHOLAR_TIMEOUT environment variable.",
775
790
  )
776
-
791
+
777
792
  # Process results
778
793
  for result in search_results:
779
-
780
794
  try:
781
795
  # Extract basic info from search result
782
- bib = result.get('bib', {})
783
-
784
- title = bib.get('title', '')
796
+ bib = result.get("bib", {})
797
+
798
+ title = bib.get("title", "")
785
799
  if not title:
786
800
  continue
787
-
801
+
788
802
  # Authors
789
- authors = bib.get('author', '').split(' and ')
790
- if not authors or authors == ['']:
803
+ authors = bib.get("author", "").split(" and ")
804
+ if not authors or authors == [""]:
791
805
  authors = []
792
-
806
+
793
807
  # Abstract (often not available in search results)
794
- abstract = bib.get('abstract', '')
795
-
808
+ abstract = bib.get("abstract", "")
809
+
796
810
  # Year
797
811
  year = None
798
- pub_year = bib.get('pub_year')
812
+ pub_year = bib.get("pub_year")
799
813
  if pub_year:
800
814
  try:
801
815
  year = int(pub_year)
802
816
  except:
803
817
  pass
804
-
818
+
805
819
  # Journal/Venue
806
- journal = bib.get('venue', '')
807
-
820
+ journal = bib.get("venue", "")
821
+
808
822
  # Citation count
809
- citation_count = result.get('num_citations', 0)
810
-
823
+ citation_count = result.get("num_citations", 0)
824
+
811
825
  # URL
812
- url = result.get('pub_url', '')
813
-
826
+ url = result.get("pub_url", "")
827
+
814
828
  # Try to extract DOI from URL or other fields
815
829
  doi = None
816
- if 'doi.org/' in url:
817
- doi = url.split('doi.org/')[-1]
818
-
830
+ if "doi.org/" in url:
831
+ doi = url.split("doi.org/")[-1]
832
+
819
833
  # Create Paper object
820
834
  paper = Paper(
821
835
  title=title,
822
836
  authors=authors,
823
- abstract=abstract or "Abstract not available from Google Scholar search",
837
+ abstract=abstract
838
+ or "Abstract not available from Google Scholar search",
824
839
  year=year,
825
840
  journal=journal,
826
841
  doi=doi,
827
842
  citation_count=citation_count,
828
843
  source="google_scholar",
829
844
  metadata={
830
- 'google_scholar_url': url,
831
- 'google_scholar_id': result.get('author_id', ''),
832
- 'eprint_url': result.get('eprint_url', '')
833
- }
845
+ "google_scholar_url": url,
846
+ "google_scholar_id": result.get("author_id", ""),
847
+ "eprint_url": result.get("eprint_url", ""),
848
+ },
834
849
  )
835
-
850
+
836
851
  papers.append(paper)
837
-
852
+
838
853
  except Exception as e:
839
854
  logger.warning(f"Failed to parse Google Scholar result: {e}")
840
855
  continue
841
-
856
+
842
857
  except ImportError as e:
843
858
  logger.error(f"Google Scholar search unavailable: {e}")
844
859
  raise SearchError(
845
860
  query=query,
846
861
  source="google_scholar",
847
- reason=f"Google Scholar search unavailable: {e}"
862
+ reason=f"Google Scholar search unavailable: {e}",
848
863
  )
849
864
  except Exception as e:
850
865
  logger.error(f"Google Scholar search failed: {e}")
851
- if "robot" in str(e).lower() or "captcha" in str(e).lower() or "Cannot Fetch" in str(e):
866
+ if (
867
+ "robot" in str(e).lower()
868
+ or "captcha" in str(e).lower()
869
+ or "Cannot Fetch" in str(e)
870
+ ):
852
871
  raise SearchError(
853
872
  query=query,
854
873
  source="google_scholar",
855
- reason="Google Scholar is blocking automated access. Consider using PubMed or Semantic Scholar instead, or configure a proxy in the scholarly package."
874
+ reason="Google Scholar is blocking automated access. Consider using PubMed or Semantic Scholar instead, or configure a proxy in the scholarly package.",
856
875
  )
857
876
  raise SearchError(
858
- query=query,
859
- source="google_scholar",
860
- reason=f"Search failed: {e}"
877
+ query=query, source="google_scholar", reason=f"Search failed: {e}"
861
878
  )
862
-
879
+
863
880
  logger.info(f"Found {len(papers)} papers from Google Scholar")
864
881
  return papers
865
882
 
866
883
 
867
884
  class LocalSearchEngine(SearchEngine):
868
885
  """Search engine for local PDF files."""
869
-
886
+
870
887
  def __init__(self, index_path: Optional[Path] = None):
871
888
  super().__init__("local")
872
889
  self.index_path = index_path or get_scholar_dir() / "local_index.json"
873
890
  self.index = self._load_index()
874
-
891
+
875
892
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
876
893
  """Search local PDF collection."""
877
894
  # Local search is synchronous, wrap in async
878
895
  return await asyncio.to_thread(self._search_sync, query, limit, kwargs)
879
-
896
+
880
897
  def _search_sync(self, query: str, limit: int, kwargs: dict) -> List[Paper]:
881
898
  """Synchronous local search implementation."""
882
899
  if not self.index:
883
900
  return []
884
-
901
+
885
902
  # Simple keyword matching
886
903
  query_terms = query.lower().split()
887
904
  scored_papers = []
888
-
905
+
889
906
  for paper_data in self.index.values():
890
907
  # Calculate relevance score
891
908
  score = 0
892
909
  searchable_text = f"{paper_data.get('title', '')} {paper_data.get('abstract', '')} {' '.join(paper_data.get('keywords', []))}".lower()
893
-
910
+
894
911
  for term in query_terms:
895
912
  score += searchable_text.count(term)
896
-
913
+
897
914
  if score > 0:
898
915
  # Create Paper object
899
916
  paper = Paper(
900
- title=paper_data.get('title', 'Unknown Title'),
901
- authors=paper_data.get('authors', []),
902
- abstract=paper_data.get('abstract', ''),
903
- year=paper_data.get('year'),
904
- keywords=paper_data.get('keywords', []),
905
- pdf_path=Path(paper_data.get('pdf_path', '')),
906
- source="local"
917
+ title=paper_data.get("title", "Unknown Title"),
918
+ authors=paper_data.get("authors", []),
919
+ abstract=paper_data.get("abstract", ""),
920
+ year=paper_data.get("year"),
921
+ keywords=paper_data.get("keywords", []),
922
+ pdf_path=Path(paper_data.get("pdf_path", "")),
923
+ source="local",
907
924
  )
908
925
  scored_papers.append((score, paper))
909
-
926
+
910
927
  # Sort by score and return top results
911
928
  scored_papers.sort(key=lambda x: x[0], reverse=True)
912
929
  return [paper for score, paper in scored_papers[:limit]]
913
-
930
+
914
931
  def _load_index(self) -> Dict[str, Any]:
915
932
  """Load local search index."""
916
933
  if self.index_path.exists():
917
934
  try:
918
- with open(self.index_path, 'r') as f:
935
+ with open(self.index_path, "r") as f:
919
936
  return json.load(f)
920
937
  except Exception as e:
921
938
  logger.warning(f"Failed to load local index: {e}")
922
939
  return {}
923
-
940
+
924
941
  def build_index(self, pdf_dirs: List[Path]) -> Dict[str, Any]:
925
942
  """Build search index from PDF directories."""
926
943
  logger.info(f"Building local index from {len(pdf_dirs)} directories")
927
-
944
+
928
945
  index = {}
929
- stats = {'files_indexed': 0, 'errors': 0}
930
-
946
+ stats = {"files_indexed": 0, "errors": 0}
947
+
931
948
  for pdf_dir in pdf_dirs:
932
949
  if not pdf_dir.exists():
933
950
  continue
934
-
951
+
935
952
  for pdf_path in pdf_dir.rglob("*.pdf"):
936
953
  try:
937
954
  # Extract text and metadata
938
955
  paper_data = self._extract_pdf_metadata(pdf_path)
939
956
  if paper_data:
940
957
  index[str(pdf_path)] = paper_data
941
- stats['files_indexed'] += 1
958
+ stats["files_indexed"] += 1
942
959
  except Exception as e:
943
960
  logger.warning(f"Failed to index {pdf_path}: {e}")
944
- stats['errors'] += 1
945
-
961
+ stats["errors"] += 1
962
+
946
963
  # Save index
947
964
  self.index = index
948
965
  self._save_index()
949
-
950
- logger.info(f"Indexed {stats['files_indexed']} files with {stats['errors']} errors")
966
+
967
+ logger.info(
968
+ f"Indexed {stats['files_indexed']} files with {stats['errors']} errors"
969
+ )
951
970
  return stats
952
-
971
+
953
972
  def _extract_pdf_metadata(self, pdf_path: Path) -> Optional[Dict[str, Any]]:
954
973
  """Extract metadata from PDF file."""
955
974
  # This is a placeholder - in real implementation would use PyPDF2 or similar
956
975
  return {
957
- 'title': pdf_path.stem.replace('_', ' ').title(),
958
- 'authors': [],
959
- 'abstract': '',
960
- 'year': None,
961
- 'keywords': [],
962
- 'pdf_path': str(pdf_path)
976
+ "title": pdf_path.stem.replace("_", " ").title(),
977
+ "authors": [],
978
+ "abstract": "",
979
+ "year": None,
980
+ "keywords": [],
981
+ "pdf_path": str(pdf_path),
963
982
  }
964
-
983
+
965
984
  def _save_index(self) -> None:
966
985
  """Save index to disk."""
967
986
  self.index_path.parent.mkdir(parents=True, exist_ok=True)
968
- with open(self.index_path, 'w') as f:
987
+ with open(self.index_path, "w") as f:
969
988
  json.dump(self.index, f, indent=2)
970
989
 
971
990
 
972
991
  class VectorSearchEngine(SearchEngine):
973
992
  """Vector similarity search using sentence embeddings."""
974
-
975
- def __init__(self, index_path: Optional[Path] = None, model_name: str = "all-MiniLM-L6-v2"):
993
+
994
+ def __init__(
995
+ self, index_path: Optional[Path] = None, model_name: str = "all-MiniLM-L6-v2"
996
+ ):
976
997
  super().__init__("vector")
977
998
  self.index_path = index_path or get_scholar_dir() / "vector_index.pkl"
978
999
  self.model_name = model_name
@@ -980,92 +1001,94 @@ class VectorSearchEngine(SearchEngine):
980
1001
  self._papers = []
981
1002
  self._embeddings = None
982
1003
  self._load_index()
983
-
1004
+
984
1005
  async def search_async(self, query: str, limit: int = 20, **kwargs) -> List[Paper]:
985
1006
  """Search using vector similarity."""
986
1007
  # Vector search is CPU-bound, use thread
987
1008
  return await asyncio.to_thread(self._search_sync, query, limit)
988
-
1009
+
989
1010
  def _search_sync(self, query: str, limit: int) -> List[Paper]:
990
1011
  """Synchronous vector search implementation."""
991
1012
  if not self._embeddings or not self._papers:
992
1013
  return []
993
-
1014
+
994
1015
  # Lazy load model
995
1016
  if self._model is None:
996
1017
  self._load_model()
997
-
1018
+
998
1019
  # Encode query
999
1020
  query_embedding = self._model.encode([query])[0]
1000
-
1021
+
1001
1022
  # Calculate similarities
1002
1023
  import numpy as np
1024
+
1003
1025
  similarities = np.dot(self._embeddings, query_embedding)
1004
-
1026
+
1005
1027
  # Get top results
1006
1028
  top_indices = np.argsort(similarities)[-limit:][::-1]
1007
-
1029
+
1008
1030
  results = []
1009
1031
  for idx in top_indices:
1010
1032
  if idx < len(self._papers):
1011
1033
  results.append(self._papers[idx])
1012
-
1034
+
1013
1035
  return results
1014
-
1036
+
1015
1037
  def add_papers(self, papers: List[Paper]) -> None:
1016
1038
  """Add papers to vector index."""
1017
1039
  if self._model is None:
1018
1040
  self._load_model()
1019
-
1041
+
1020
1042
  # Create searchable text for each paper
1021
1043
  texts = []
1022
1044
  for paper in papers:
1023
1045
  text = f"{paper.title} {paper.abstract}"
1024
1046
  texts.append(text)
1025
-
1047
+
1026
1048
  # Encode papers
1027
1049
  new_embeddings = self._model.encode(texts)
1028
-
1050
+
1029
1051
  # Add to index
1030
1052
  import numpy as np
1053
+
1031
1054
  if self._embeddings is None:
1032
1055
  self._embeddings = new_embeddings
1033
1056
  self._papers = papers
1034
1057
  else:
1035
1058
  self._embeddings = np.vstack([self._embeddings, new_embeddings])
1036
1059
  self._papers.extend(papers)
1037
-
1060
+
1038
1061
  # Save index
1039
1062
  self._save_index()
1040
-
1063
+
1041
1064
  def _load_model(self) -> None:
1042
1065
  """Load sentence transformer model."""
1043
1066
  try:
1044
1067
  from sentence_transformers import SentenceTransformer
1068
+
1045
1069
  self._model = SentenceTransformer(self.model_name)
1046
1070
  except ImportError:
1047
- logger.warning("sentence-transformers not installed. Vector search disabled.")
1071
+ logger.warning(
1072
+ "sentence-transformers not installed. Vector search disabled."
1073
+ )
1048
1074
  self._model = None
1049
-
1075
+
1050
1076
  def _load_index(self) -> None:
1051
1077
  """Load vector index from disk."""
1052
1078
  if self.index_path.exists():
1053
1079
  try:
1054
- with open(self.index_path, 'rb') as f:
1080
+ with open(self.index_path, "rb") as f:
1055
1081
  data = pickle.load(f)
1056
- self._papers = data.get('papers', [])
1057
- self._embeddings = data.get('embeddings')
1082
+ self._papers = data.get("papers", [])
1083
+ self._embeddings = data.get("embeddings")
1058
1084
  except Exception as e:
1059
1085
  logger.warning(f"Failed to load vector index: {e}")
1060
-
1086
+
1061
1087
  def _save_index(self) -> None:
1062
1088
  """Save vector index to disk."""
1063
1089
  self.index_path.parent.mkdir(parents=True, exist_ok=True)
1064
- data = {
1065
- 'papers': self._papers,
1066
- 'embeddings': self._embeddings
1067
- }
1068
- with open(self.index_path, 'wb') as f:
1090
+ data = {"papers": self._papers, "embeddings": self._embeddings}
1091
+ with open(self.index_path, "wb") as f:
1069
1092
  pickle.dump(data, f)
1070
1093
 
1071
1094
 
@@ -1073,27 +1096,38 @@ class UnifiedSearcher:
1073
1096
  """
1074
1097
  Unified searcher that combines results from multiple engines.
1075
1098
  """
1076
-
1077
- def __init__(self,
1078
- config=None,
1079
- email: Optional[str] = None,
1080
- semantic_scholar_api_key: Optional[str] = None,
1081
- crossref_api_key: Optional[str] = None,
1082
- google_scholar_timeout: int = 10):
1099
+
1100
+ def __init__(
1101
+ self,
1102
+ config=None,
1103
+ email: Optional[str] = None,
1104
+ semantic_scholar_api_key: Optional[str] = None,
1105
+ crossref_api_key: Optional[str] = None,
1106
+ google_scholar_timeout: int = 10,
1107
+ ):
1083
1108
  """Initialize unified searcher with all engines."""
1084
-
1109
+
1085
1110
  # Handle config parameter
1086
1111
  if config is not None:
1087
1112
  from scitex.scholar.config import ScholarConfig
1113
+
1088
1114
  if not isinstance(config, ScholarConfig):
1089
1115
  raise TypeError("config must be a ScholarConfig instance")
1090
1116
  self.config = config
1091
-
1117
+
1092
1118
  # Use config resolution for parameters
1093
- self.email = self.config.resolve("pubmed_email", email, "research@example.com")
1094
- self.semantic_scholar_api_key = self.config.resolve("semantic_scholar_api_key", semantic_scholar_api_key, None)
1095
- self.crossref_api_key = self.config.resolve("crossref_api_key", crossref_api_key, None)
1096
- self.google_scholar_timeout = google_scholar_timeout # No config key for this yet
1119
+ self.email = self.config.resolve(
1120
+ "pubmed_email", email, "research@example.com"
1121
+ )
1122
+ self.semantic_scholar_api_key = self.config.resolve(
1123
+ "semantic_scholar_api_key", semantic_scholar_api_key, None
1124
+ )
1125
+ self.crossref_api_key = self.config.resolve(
1126
+ "crossref_api_key", crossref_api_key, None
1127
+ )
1128
+ self.google_scholar_timeout = (
1129
+ google_scholar_timeout # No config key for this yet
1130
+ )
1097
1131
  else:
1098
1132
  # Fallback to direct parameters
1099
1133
  self.config = None
@@ -1101,65 +1135,81 @@ class UnifiedSearcher:
1101
1135
  self.semantic_scholar_api_key = semantic_scholar_api_key
1102
1136
  self.crossref_api_key = crossref_api_key
1103
1137
  self.google_scholar_timeout = google_scholar_timeout
1104
-
1138
+
1105
1139
  self._engines = {} # Lazy-loaded engines
1106
-
1140
+
1107
1141
  @property
1108
1142
  def engines(self):
1109
1143
  """Lazy-load engines as needed."""
1110
1144
  return self._engines
1111
-
1145
+
1112
1146
  def _get_engine(self, source: str):
1113
1147
  """Get or create engine for a source."""
1114
1148
  if source not in self._engines:
1115
- if source == 'semantic_scholar':
1116
- self._engines[source] = SemanticScholarEngine(self.semantic_scholar_api_key)
1117
- elif source == 'pubmed':
1149
+ if source == "semantic_scholar":
1150
+ self._engines[source] = SemanticScholarEngine(
1151
+ self.semantic_scholar_api_key
1152
+ )
1153
+ elif source == "pubmed":
1118
1154
  self._engines[source] = PubMedEngine(self.email)
1119
- elif source == 'arxiv':
1155
+ elif source == "arxiv":
1120
1156
  self._engines[source] = ArxivEngine()
1121
- elif source == 'google_scholar':
1122
- self._engines[source] = GoogleScholarEngine(timeout=self.google_scholar_timeout)
1123
- elif source == 'crossref':
1124
- self._engines[source] = CrossRefEngine(api_key=self.crossref_api_key, email=self.email)
1125
- elif source == 'local':
1157
+ elif source == "google_scholar":
1158
+ self._engines[source] = GoogleScholarEngine(
1159
+ timeout=self.google_scholar_timeout
1160
+ )
1161
+ elif source == "crossref":
1162
+ self._engines[source] = CrossRefEngine(
1163
+ api_key=self.crossref_api_key, email=self.email
1164
+ )
1165
+ elif source == "local":
1126
1166
  self._engines[source] = LocalSearchEngine()
1127
- elif source == 'vector':
1167
+ elif source == "vector":
1128
1168
  self._engines[source] = VectorSearchEngine()
1129
1169
  else:
1130
1170
  raise ValueError(f"Unknown source: {source}")
1131
1171
  return self._engines[source]
1132
-
1133
- async def search_async(self,
1134
- query: str,
1135
- sources: List[str] = None,
1136
- limit: int = 20,
1137
- deduplicate: bool = True,
1138
- **kwargs) -> List[Paper]:
1172
+
1173
+ async def search_async(
1174
+ self,
1175
+ query: str,
1176
+ sources: List[str] = None,
1177
+ limit: int = 20,
1178
+ deduplicate: bool = True,
1179
+ **kwargs,
1180
+ ) -> List[Paper]:
1139
1181
  """
1140
1182
  Search across multiple sources and merge results.
1141
-
1183
+
1142
1184
  Args:
1143
1185
  query: Search query
1144
1186
  sources: List of sources to search (default: all web sources)
1145
1187
  limit: Maximum results per source
1146
1188
  deduplicate: Remove duplicate papers
1147
1189
  **kwargs: Additional parameters for engines
1148
-
1190
+
1149
1191
  Returns:
1150
1192
  Merged and ranked list of papers
1151
1193
  """
1152
1194
  if sources is None:
1153
- sources = ['pubmed'] # Default to PubMed only
1154
-
1195
+ sources = ["pubmed"] # Default to PubMed only
1196
+
1155
1197
  # Filter to valid sources
1156
- valid_sources = ['pubmed', 'semantic_scholar', 'google_scholar', 'crossref', 'arxiv', 'local', 'vector']
1198
+ valid_sources = [
1199
+ "pubmed",
1200
+ "semantic_scholar",
1201
+ "google_scholar",
1202
+ "crossref",
1203
+ "arxiv",
1204
+ "local",
1205
+ "vector",
1206
+ ]
1157
1207
  sources = [s for s in sources if s in valid_sources]
1158
-
1208
+
1159
1209
  if not sources:
1160
1210
  logger.warning("No valid search sources specified")
1161
1211
  return []
1162
-
1212
+
1163
1213
  # Search all sources concurrently
1164
1214
  tasks = []
1165
1215
  for source in sources:
@@ -1169,10 +1219,10 @@ class UnifiedSearcher:
1169
1219
  tasks.append(task)
1170
1220
  except Exception as e:
1171
1221
  logger.debug(f"Failed to initialize {source} engine: {e}")
1172
-
1222
+
1173
1223
  logger.debug(f"Searching {len(tasks)} sources: {sources}")
1174
1224
  results = await asyncio.gather(*tasks, return_exceptions=True)
1175
-
1225
+
1176
1226
  # Merge results
1177
1227
  all_papers = []
1178
1228
  for source, result in zip(sources, results):
@@ -1181,26 +1231,26 @@ class UnifiedSearcher:
1181
1231
  else:
1182
1232
  logger.debug(f"{source} returned {len(result)} papers")
1183
1233
  all_papers.extend(result)
1184
-
1234
+
1185
1235
  # Deduplicate if requested
1186
1236
  if deduplicate:
1187
1237
  all_papers = self._deduplicate_papers(all_papers)
1188
-
1238
+
1189
1239
  # Sort by relevance (using citation count as proxy)
1190
1240
  all_papers.sort(key=lambda p: p.citation_count or 0, reverse=True)
1191
-
1241
+
1192
1242
  return all_papers[:limit]
1193
-
1243
+
1194
1244
  def _deduplicate_papers(self, papers: List[Paper]) -> List[Paper]:
1195
1245
  """Remove duplicate papers based on similarity."""
1196
1246
  if not papers:
1197
1247
  return []
1198
-
1248
+
1199
1249
  unique_papers = [papers[0]]
1200
-
1250
+
1201
1251
  for paper in papers[1:]:
1202
1252
  is_duplicate = False
1203
-
1253
+
1204
1254
  for unique_paper in unique_papers:
1205
1255
  if paper.similarity_score(unique_paper) > 0.85:
1206
1256
  is_duplicate = True
@@ -1209,86 +1259,95 @@ class UnifiedSearcher:
1209
1259
  unique_papers.remove(unique_paper)
1210
1260
  unique_papers.append(paper)
1211
1261
  break
1212
-
1262
+
1213
1263
  if not is_duplicate:
1214
1264
  unique_papers.append(paper)
1215
-
1265
+
1216
1266
  return unique_papers
1217
-
1267
+
1218
1268
  def build_local_index(self, pdf_dirs: List[Union[str, Path]]) -> Dict[str, Any]:
1219
1269
  """Build local search index."""
1220
1270
  pdf_dirs = [Path(d) for d in pdf_dirs]
1221
- return self.engines['local'].build_index(pdf_dirs)
1222
-
1271
+ return self.engines["local"].build_index(pdf_dirs)
1272
+
1223
1273
  def add_to_vector_index(self, papers: List[Paper]) -> None:
1224
1274
  """Add papers to vector search index."""
1225
- self.engines['vector'].add_papers(papers)
1275
+ self.engines["vector"].add_papers(papers)
1226
1276
 
1227
1277
 
1228
1278
  # Convenience functions - get_scholar_dir moved to utils._paths
1229
1279
 
1230
1280
 
1231
- async def search_async(query: str,
1232
- sources: List[str] = None,
1233
- limit: int = 20,
1234
- email: Optional[str] = None,
1235
- semantic_scholar_api_key: Optional[str] = None,
1236
- **kwargs) -> List[Paper]:
1281
+ async def search_async(
1282
+ query: str,
1283
+ sources: List[str] = None,
1284
+ limit: int = 20,
1285
+ email: Optional[str] = None,
1286
+ semantic_scholar_api_key: Optional[str] = None,
1287
+ **kwargs,
1288
+ ) -> List[Paper]:
1237
1289
  """
1238
1290
  Async convenience function for searching papers.
1239
1291
  """
1240
- searcher = UnifiedSearcher(email=email, semantic_scholar_api_key=semantic_scholar_api_key)
1292
+ searcher = UnifiedSearcher(
1293
+ email=email, semantic_scholar_api_key=semantic_scholar_api_key
1294
+ )
1241
1295
  return await searcher.search_async(query, sources, limit, **kwargs)
1242
1296
 
1243
1297
 
1244
- def search_sync(query: str,
1245
- sources: List[str] = None,
1246
- limit: int = 20,
1247
- email: Optional[str] = None,
1248
- semantic_scholar_api_key: Optional[str] = None,
1249
- **kwargs) -> List[Paper]:
1298
+ def search_sync(
1299
+ query: str,
1300
+ sources: List[str] = None,
1301
+ limit: int = 20,
1302
+ email: Optional[str] = None,
1303
+ semantic_scholar_api_key: Optional[str] = None,
1304
+ **kwargs,
1305
+ ) -> List[Paper]:
1250
1306
  """
1251
1307
  Synchronous convenience function for searching papers.
1252
1308
  """
1253
- return asyncio.run(search_async(query, sources, limit, email, semantic_scholar_api_key, **kwargs))
1309
+ return asyncio.run(
1310
+ search_async(query, sources, limit, email, semantic_scholar_api_key, **kwargs)
1311
+ )
1254
1312
 
1255
1313
 
1256
- def build_index(paths: List[Union[str, Path]],
1257
- vector_index: bool = True) -> Dict[str, Any]:
1314
+ def build_index(
1315
+ paths: List[Union[str, Path]], vector_index: bool = True
1316
+ ) -> Dict[str, Any]:
1258
1317
  """
1259
1318
  Build local search indices.
1260
-
1319
+
1261
1320
  Args:
1262
1321
  paths: Directories containing PDFs
1263
1322
  vector_index: Also build vector similarity index
1264
-
1323
+
1265
1324
  Returns:
1266
1325
  Statistics about indexing
1267
1326
  """
1268
1327
  searcher = UnifiedSearcher()
1269
1328
  stats = searcher.build_local_index(paths)
1270
-
1329
+
1271
1330
  if vector_index:
1272
1331
  # Add papers to vector index
1273
- papers = searcher.engines['local']._search_sync("*", 9999, {})
1332
+ papers = searcher.engines["local"]._search_sync("*", 9999, {})
1274
1333
  if papers:
1275
1334
  searcher.add_to_vector_index(papers)
1276
- stats['vector_indexed'] = len(papers)
1277
-
1335
+ stats["vector_indexed"] = len(papers)
1336
+
1278
1337
  return stats
1279
1338
 
1280
1339
 
1281
1340
  # Export all classes and functions
1282
1341
  __all__ = [
1283
- 'SearchEngine',
1284
- 'SemanticScholarEngine',
1285
- 'PubMedEngine',
1286
- 'ArxivEngine',
1287
- 'LocalSearchEngine',
1288
- 'VectorSearchEngine',
1289
- 'UnifiedSearcher',
1290
- 'get_scholar_dir',
1291
- 'search',
1292
- 'search_sync',
1293
- 'build_index'
1294
- ]
1342
+ "SearchEngine",
1343
+ "SemanticScholarEngine",
1344
+ "PubMedEngine",
1345
+ "ArxivEngine",
1346
+ "LocalSearchEngine",
1347
+ "VectorSearchEngine",
1348
+ "UnifiedSearcher",
1349
+ "get_scholar_dir",
1350
+ "search",
1351
+ "search_sync",
1352
+ "build_index",
1353
+ ]