scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1835 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-08 05:41:15 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/storage/_LibraryManager.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/storage/_LibraryManager.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ __FILE__ = __file__
15
+
16
+ import asyncio
17
+ import copy
18
+ import json
19
+ import re
20
+ from collections import OrderedDict
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional
24
+
25
+ from scitex import logging
26
+ from scitex.scholar.config import ScholarConfig
27
+ from scitex.scholar.metadata_engines.utils import BASE_STRUCTURE, standardize_metadata
28
+ from scitex.scholar.storage._DeduplicationManager import DeduplicationManager
29
+ from scitex.scholar.utils import TextNormalizer
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class LibraryManager:
35
+ """Unified manager for Scholar library structure and paper storage."""
36
+
37
+ def __init__(
38
+ self,
39
+ project: str = None,
40
+ single_doi_resolver=None,
41
+ config: Optional[ScholarConfig] = None,
42
+ ):
43
+ """Initialize library manager."""
44
+ self.config = config or ScholarConfig()
45
+ self.project = self.config.resolve("project", project)
46
+ self.library_master_dir = self.config.path_manager.get_library_master_dir()
47
+ self.single_doi_resolver = single_doi_resolver
48
+ self._source_filename = "papers"
49
+ self.dedup_manager = DeduplicationManager(config=self.config)
50
+
51
+ # =========================================================================
52
+ # Storage Helper Methods (Phase 1)
53
+ # =========================================================================
54
+
55
+ def has_metadata(self, paper_id: str) -> bool:
56
+ """Check if metadata.json exists for paper.
57
+
58
+ Args:
59
+ paper_id: 8-digit paper ID
60
+
61
+ Returns:
62
+ True if metadata.json exists, False otherwise
63
+ """
64
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
65
+ return metadata_file.exists()
66
+
67
+ def has_urls(self, paper_id: str) -> bool:
68
+ """Check if PDF URLs exist in metadata.
69
+
70
+ Args:
71
+ paper_id: 8-digit paper ID
72
+
73
+ Returns:
74
+ True if metadata has PDF URLs, False otherwise
75
+ """
76
+ if not self.has_metadata(paper_id):
77
+ return False
78
+
79
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
80
+ try:
81
+ with open(metadata_file, 'r') as f:
82
+ data = json.load(f)
83
+
84
+ # Check nested structure: metadata.url.pdfs
85
+ urls = data.get("metadata", {}).get("url", {}).get("pdfs", [])
86
+ return len(urls) > 0
87
+ except Exception:
88
+ return False
89
+
90
+ def has_pdf(self, paper_id: str) -> bool:
91
+ """Check if PDF file exists in storage.
92
+
93
+ Args:
94
+ paper_id: 8-digit paper ID
95
+
96
+ Returns:
97
+ True if any PDF file exists, False otherwise
98
+ """
99
+ paper_dir = self.library_master_dir / paper_id
100
+ if not paper_dir.exists():
101
+ return False
102
+
103
+ # Check for any PDF files
104
+ pdf_files = list(paper_dir.glob("*.pdf"))
105
+ return len(pdf_files) > 0
106
+
107
+ def load_paper_from_id(self, paper_id: str) -> Optional["Paper"]:
108
+ """Load Paper object from storage by ID.
109
+
110
+ Args:
111
+ paper_id: 8-digit paper ID
112
+
113
+ Returns:
114
+ Paper object if found, None otherwise
115
+ """
116
+ from scitex.scholar.core.Paper import Paper
117
+
118
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
119
+
120
+ if not metadata_file.exists():
121
+ return None
122
+
123
+ try:
124
+ with open(metadata_file, 'r') as f:
125
+ data = json.load(f)
126
+
127
+ # Use Paper.from_dict() which handles Pydantic validation
128
+ paper = Paper.from_dict(data)
129
+ return paper
130
+
131
+ except Exception as e:
132
+ logger.error(f"Failed to load paper {paper_id}: {e}")
133
+ return None
134
+
135
+ def save_paper_incremental(self, paper_id: str, paper: "Paper") -> None:
136
+ """Save Paper object to storage (incremental update).
137
+
138
+ This saves the complete Paper object to metadata.json,
139
+ preserving existing data and updating with new fields.
140
+
141
+ Args:
142
+ paper_id: 8-digit paper ID
143
+ paper: Paper object to save
144
+ """
145
+ storage_path = self.library_master_dir / paper_id
146
+ storage_path.mkdir(parents=True, exist_ok=True)
147
+
148
+ metadata_file = storage_path / "metadata.json"
149
+
150
+ # Load existing metadata if it exists
151
+ existing_data = {}
152
+ if metadata_file.exists():
153
+ try:
154
+ with open(metadata_file, 'r') as f:
155
+ existing_data = json.load(f)
156
+ except Exception:
157
+ pass
158
+
159
+ # Get new data from Paper object
160
+ new_data = paper.model_dump()
161
+
162
+ # Merge: new data takes precedence for non-None values
163
+ merged_data = self._merge_metadata(existing_data, new_data)
164
+
165
+ # Update timestamps
166
+ if "container" not in merged_data:
167
+ merged_data["container"] = {}
168
+ merged_data["container"]["updated_at"] = datetime.now().isoformat()
169
+
170
+ # Save to file
171
+ with open(metadata_file, 'w') as f:
172
+ json.dump(merged_data, f, indent=2, ensure_ascii=False)
173
+
174
+ logger.debug(f"Saved paper {paper_id} to storage")
175
+
176
+ def _merge_metadata(self, existing: Dict, new: Dict) -> Dict:
177
+ """Recursively merge metadata dicts, preferring new non-None values."""
178
+ result = existing.copy()
179
+
180
+ for key, new_value in new.items():
181
+ if key not in result:
182
+ result[key] = new_value
183
+ elif new_value is None:
184
+ # Keep existing value if new is None
185
+ pass
186
+ elif isinstance(new_value, dict) and isinstance(result[key], dict):
187
+ # Recursively merge nested dicts
188
+ result[key] = self._merge_metadata(result[key], new_value)
189
+ elif isinstance(new_value, list) and len(new_value) > 0:
190
+ # Update lists if new list is not empty
191
+ result[key] = new_value
192
+ elif new_value:
193
+ # Update with new non-empty value
194
+ result[key] = new_value
195
+
196
+ return result
197
+
198
+ # =========================================================================
199
+ # Existing Methods
200
+ # =========================================================================
201
+
202
+ def _dotdict_to_dict(self, obj):
203
+ """Recursively convert DotDict to plain dict for JSON serialization."""
204
+ from scitex.dict import DotDict
205
+
206
+ if isinstance(obj, DotDict):
207
+ return {k: self._dotdict_to_dict(v) for k, v in obj._data.items()}
208
+ elif isinstance(obj, dict):
209
+ return {k: self._dotdict_to_dict(v) for k, v in obj.items()}
210
+ elif isinstance(obj, list):
211
+ return [self._dotdict_to_dict(item) for item in obj]
212
+ else:
213
+ return obj
214
+
215
+ def _add_engine_to_list(self, engines_list: list, source: str) -> None:
216
+ """Helper to add source to engines list if not already present."""
217
+ if source and source not in engines_list:
218
+ engines_list.append(source)
219
+
220
+ def _convert_to_standardized_metadata(
221
+ self, flat_metadata: Dict
222
+ ) -> OrderedDict:
223
+ """Convert flat metadata dict to standardized nested structure with _engines tracking."""
224
+ standardized = copy.deepcopy(BASE_STRUCTURE)
225
+
226
+ # Map flat fields to standardized structure
227
+ # ID section
228
+ if "doi" in flat_metadata:
229
+ standardized["id"]["doi"] = flat_metadata["doi"]
230
+ self._add_engine_to_list(
231
+ standardized["id"]["doi_engines"],
232
+ flat_metadata.get("doi_source"),
233
+ )
234
+ if "scitex_id" in flat_metadata:
235
+ standardized["id"]["scholar_id"] = flat_metadata["scitex_id"]
236
+
237
+ # Basic section
238
+ if "title" in flat_metadata:
239
+ standardized["basic"]["title"] = flat_metadata["title"]
240
+ self._add_engine_to_list(
241
+ standardized["basic"]["title_engines"],
242
+ flat_metadata.get("title_source"),
243
+ )
244
+ if "authors" in flat_metadata:
245
+ standardized["basic"]["authors"] = flat_metadata["authors"]
246
+ self._add_engine_to_list(
247
+ standardized["basic"]["authors_engines"],
248
+ flat_metadata.get("authors_source"),
249
+ )
250
+ if "year" in flat_metadata:
251
+ standardized["basic"]["year"] = flat_metadata["year"]
252
+ self._add_engine_to_list(
253
+ standardized["basic"]["year_engines"],
254
+ flat_metadata.get("year_source"),
255
+ )
256
+ if "abstract" in flat_metadata:
257
+ standardized["basic"]["abstract"] = flat_metadata["abstract"]
258
+ self._add_engine_to_list(
259
+ standardized["basic"]["abstract_engines"],
260
+ flat_metadata.get("abstract_source"),
261
+ )
262
+
263
+ # Citation count section
264
+ if "citation_count" in flat_metadata:
265
+ cc_value = flat_metadata["citation_count"]
266
+ # Handle both scalar (4) and dict ({"total": 4}) formats
267
+ if isinstance(cc_value, dict):
268
+ # If it's a dict, extract the total value
269
+ standardized["citation_count"]["total"] = cc_value.get("total")
270
+ self._add_engine_to_list(
271
+ standardized["citation_count"]["total_engines"],
272
+ cc_value.get("total_source"),
273
+ )
274
+ # Copy yearly breakdowns if present
275
+ for year in [
276
+ "2025",
277
+ "2024",
278
+ "2023",
279
+ "2022",
280
+ "2021",
281
+ "2020",
282
+ "2019",
283
+ "2018",
284
+ "2017",
285
+ "2016",
286
+ "2015",
287
+ ]:
288
+ if year in cc_value:
289
+ standardized["citation_count"][year] = cc_value[year]
290
+ if f"{year}_source" in cc_value:
291
+ self._add_engine_to_list(
292
+ standardized["citation_count"][
293
+ f"{year}_engines"
294
+ ],
295
+ cc_value.get(f"{year}_source"),
296
+ )
297
+ else:
298
+ # If it's a scalar, just assign it to total
299
+ standardized["citation_count"]["total"] = cc_value
300
+ self._add_engine_to_list(
301
+ standardized["citation_count"]["total_engines"],
302
+ flat_metadata.get("citation_count_source"),
303
+ )
304
+
305
+ # Publication section
306
+ if "journal" in flat_metadata:
307
+ standardized["publication"]["journal"] = flat_metadata["journal"]
308
+ self._add_engine_to_list(
309
+ standardized["publication"]["journal_engines"],
310
+ flat_metadata.get("journal_source"),
311
+ )
312
+ if "short_journal" in flat_metadata:
313
+ standardized["publication"]["short_journal"] = flat_metadata[
314
+ "short_journal"
315
+ ]
316
+ if "impact_factor" in flat_metadata:
317
+ standardized["publication"]["impact_factor"] = flat_metadata[
318
+ "impact_factor"
319
+ ]
320
+ if "issn" in flat_metadata:
321
+ standardized["publication"]["issn"] = flat_metadata["issn"]
322
+ if "volume" in flat_metadata:
323
+ standardized["publication"]["volume"] = flat_metadata["volume"]
324
+ if "issue" in flat_metadata:
325
+ standardized["publication"]["issue"] = flat_metadata["issue"]
326
+ if "pages" in flat_metadata:
327
+ # Split pages into first_page and last_page if needed
328
+ pages = flat_metadata["pages"]
329
+ if pages and "-" in str(pages):
330
+ first, last = str(pages).split("-", 1)
331
+ standardized["publication"]["first_page"] = first.strip()
332
+ standardized["publication"]["last_page"] = last.strip()
333
+ if "publisher" in flat_metadata:
334
+ standardized["publication"]["publisher"] = flat_metadata[
335
+ "publisher"
336
+ ]
337
+
338
+ # URL section
339
+ if "url_doi" in flat_metadata:
340
+ standardized["url"]["doi"] = flat_metadata["url_doi"]
341
+ if "url_publisher" in flat_metadata:
342
+ standardized["url"]["publisher"] = flat_metadata["url_publisher"]
343
+ self._add_engine_to_list(
344
+ standardized["url"]["publisher_engines"], "ScholarURLFinder"
345
+ )
346
+ if "url_openurl_query" in flat_metadata:
347
+ standardized["url"]["openurl_query"] = flat_metadata[
348
+ "url_openurl_query"
349
+ ]
350
+ if "url_openurl_resolved" in flat_metadata:
351
+ standardized["url"]["openurl_resolved"] = flat_metadata[
352
+ "url_openurl_resolved"
353
+ ]
354
+ self._add_engine_to_list(
355
+ standardized["url"]["openurl_resolved_engines"],
356
+ "ScholarURLFinder",
357
+ )
358
+ if "urls_pdf" in flat_metadata:
359
+ standardized["url"]["pdfs"] = flat_metadata["urls_pdf"]
360
+ self._add_engine_to_list(
361
+ standardized["url"]["pdfs_engines"], "ScholarURLFinder"
362
+ )
363
+
364
+ # Path section
365
+ if "pdf_path" in flat_metadata:
366
+ standardized["path"]["pdfs"] = [flat_metadata["pdf_path"]]
367
+ self._add_engine_to_list(
368
+ standardized["path"]["pdfs_engines"],
369
+ "ScholarPDFDownloaderWithScreenshotsParallel",
370
+ )
371
+
372
+ return standardized
373
+
374
+ def _call_path_manager_get_storage_paths(
375
+ self, paper_info: Dict, collection_name: str = "MASTER"
376
+ ) -> Dict[str, Any]:
377
+ """Helper to call PathManager's get_paper_storage_paths with proper parameters."""
378
+ # Extract parameters from paper_info dict
379
+ doi = paper_info.get("doi")
380
+ title = paper_info.get("title")
381
+ authors = paper_info.get("authors", [])
382
+ year = paper_info.get("year")
383
+ journal = paper_info.get("journal")
384
+
385
+ # Call PathManager with individual parameters
386
+ storage_path, readable_name, paper_id = (
387
+ self.config.path_manager.get_paper_storage_paths(
388
+ doi=doi,
389
+ title=title,
390
+ authors=authors,
391
+ year=year,
392
+ journal=journal,
393
+ project=collection_name,
394
+ )
395
+ )
396
+
397
+ # Return in the expected dict format
398
+ return {
399
+ "storage_path": storage_path,
400
+ "readable_name": readable_name,
401
+ "unique_id": paper_id,
402
+ }
403
+
404
+ def check_library_for_doi(
405
+ self, title: str, year: Optional[int] = None
406
+ ) -> Optional[str]:
407
+ """Check if DOI already exists in master Scholar library."""
408
+
409
+ try:
410
+ for paper_dir in self.library_master_dir.iterdir():
411
+ if not paper_dir.is_dir():
412
+ continue
413
+
414
+ metadata_file = paper_dir / "metadata.json"
415
+ if metadata_file.exists():
416
+ try:
417
+ with open(metadata_file, "r") as file_:
418
+ metadata = json.load(file_)
419
+
420
+ stored_title = metadata.get("title", "")
421
+ stored_year = metadata.get("year")
422
+ stored_doi = metadata.get("doi")
423
+
424
+ title_match = self._is_title_similar(
425
+ title, stored_title
426
+ )
427
+ year_match = (
428
+ not year
429
+ or not stored_year
430
+ or abs(int(stored_year) - int(year)) <= 1
431
+ if isinstance(stored_year, (int, str))
432
+ and str(stored_year).isdigit()
433
+ else stored_year == year
434
+ )
435
+
436
+ if title_match and year_match and stored_doi:
437
+ logger.info(
438
+ f"DOI found in master Scholar library: {stored_doi} (paper_id: {paper_dir.name})"
439
+ )
440
+ return stored_doi
441
+
442
+ except (
443
+ json.JSONDecodeError,
444
+ KeyError,
445
+ ValueError,
446
+ ) as exc_:
447
+ logger.debug(
448
+ f"Error reading metadata from {metadata_file}: {exc_}"
449
+ )
450
+ continue
451
+
452
+ return None
453
+
454
+ except Exception as exc_:
455
+ logger.debug(f"Error checking master Scholar library: {exc_}")
456
+ return None
457
+
458
+ def save_resolved_paper(
459
+ self,
460
+ # Can accept either a Paper object or individual fields
461
+ paper_data: Optional["Paper"] = None,
462
+ # Required bibliographic fields (if not providing paper_data)
463
+ title: Optional[str] = None,
464
+ doi: Optional[str] = None,
465
+ # Optional bibliographic fields
466
+ authors: Optional[List[str]] = None,
467
+ year: Optional[int] = None,
468
+ journal: Optional[str] = None,
469
+ abstract: Optional[str] = None,
470
+ # Additional bibliographic fields
471
+ volume: Optional[str] = None,
472
+ issue: Optional[str] = None,
473
+ pages: Optional[str] = None,
474
+ publisher: Optional[str] = None,
475
+ issn: Optional[str] = None,
476
+ short_journal: Optional[str] = None,
477
+ # Enrichment fields
478
+ citation_count: Optional[int] = None,
479
+ impact_factor: Optional[float] = None,
480
+ # Source tracking (which engine/database provided this info)
481
+ doi_source: Optional[str] = None,
482
+ title_source: Optional[str] = None,
483
+ abstract_source: Optional[str] = None,
484
+ authors_source: Optional[str] = None,
485
+ year_source: Optional[str] = None,
486
+ journal_source: Optional[str] = None,
487
+ # Library management
488
+ library_id: Optional[str] = None,
489
+ project: Optional[str] = None,
490
+ # Legacy support (will be removed)
491
+ metadata: Optional[Dict] = None,
492
+ bibtex_source: Optional[str] = None,
493
+ source: Optional[str] = None, # Legacy doi_source
494
+ paper_id: Optional[str] = None, # Legacy library_id
495
+ **kwargs, # For backward compatibility
496
+ ) -> str:
497
+ """Save successfully resolved paper to Scholar library."""
498
+
499
+ # If paper_data is provided, extract fields from it
500
+ if paper_data is not None:
501
+ if hasattr(paper_data, "metadata"):
502
+ # Pydantic Paper object
503
+ title = title or (paper_data.metadata.basic.title or "")
504
+ doi = doi or (paper_data.metadata.id.doi or "")
505
+ authors = authors or paper_data.metadata.basic.authors
506
+ year = year or paper_data.metadata.basic.year
507
+ journal = journal or paper_data.metadata.publication.journal
508
+ abstract = abstract or paper_data.metadata.basic.abstract
509
+ publisher = (
510
+ publisher or paper_data.metadata.publication.publisher
511
+ )
512
+ impact_factor = (
513
+ impact_factor
514
+ or paper_data.metadata.publication.impact_factor
515
+ )
516
+ library_id = library_id or paper_data.container.library_id
517
+ elif isinstance(paper_data, dict):
518
+ # Dict paper object
519
+ title = title or paper_data.get("title", "")
520
+ doi = doi or paper_data.get("doi", "")
521
+ authors = authors or paper_data.get("authors", [])
522
+ year = year or paper_data.get("year")
523
+ journal = journal or paper_data.get("journal")
524
+ abstract = abstract or paper_data.get("abstract")
525
+ publisher = publisher or paper_data.get("publisher")
526
+ impact_factor = impact_factor or paper_data.get(
527
+ "impact_factor"
528
+ )
529
+ library_id = (
530
+ library_id
531
+ or paper_data.get("scitex_id")
532
+ or paper_data.get("scholar_id")
533
+ )
534
+
535
+ # Handle legacy parameters
536
+ if paper_id and not library_id:
537
+ library_id = paper_id
538
+ if source and not doi_source:
539
+ doi_source = source
540
+
541
+ # Build paper_info with explicit parameters (not metadata dict)
542
+ paper_info = {
543
+ "title": title,
544
+ "year": year,
545
+ "authors": authors or [],
546
+ "doi": doi,
547
+ "journal": journal,
548
+ }
549
+
550
+ # Only use metadata dict as fallback for backward compatibility
551
+ if metadata:
552
+ if not journal:
553
+ journal = metadata.get("journal")
554
+ paper_info["journal"] = journal
555
+ if not year:
556
+ year = metadata.get("year")
557
+ paper_info["year"] = year
558
+ if not authors:
559
+ authors = metadata.get("authors")
560
+ paper_info["authors"] = authors or []
561
+
562
+ # Check for existing paper first (deduplication)
563
+ check_metadata = {
564
+ "doi": doi,
565
+ "title": title,
566
+ "authors": authors or [],
567
+ "year": year,
568
+ }
569
+ existing_paper_dir = self.dedup_manager.check_for_existing_paper(
570
+ check_metadata
571
+ )
572
+
573
+ if existing_paper_dir:
574
+ logger.info(f"Found existing paper: {existing_paper_dir.name}")
575
+ # Update existing paper instead of creating new
576
+ master_storage_path = existing_paper_dir
577
+ paper_id = existing_paper_dir.name
578
+ readable_name = None # Will be determined from existing symlinks
579
+ else:
580
+ # Call PathManager with individual parameters for new paper
581
+ storage_path, readable_name, paper_id = (
582
+ self.config.path_manager.get_paper_storage_paths(
583
+ doi=doi,
584
+ title=title,
585
+ authors=authors or [],
586
+ year=year,
587
+ journal=journal,
588
+ project="MASTER",
589
+ )
590
+ )
591
+ master_storage_path = storage_path
592
+
593
+ # Use provided library_id if available, otherwise use generated paper_id
594
+ if library_id:
595
+ paper_id = library_id
596
+
597
+ master_metadata_file = master_storage_path / "metadata.json"
598
+
599
+ existing_metadata = {}
600
+ if master_metadata_file.exists():
601
+ try:
602
+ with open(master_metadata_file, "r") as file_:
603
+ existing_metadata = json.load(file_)
604
+ except (json.JSONDecodeError, IOError):
605
+ existing_metadata = {}
606
+
607
+ # Clean text fields
608
+ clean_title = TextNormalizer.clean_metadata_text(
609
+ existing_metadata.get("title", title)
610
+ )
611
+
612
+ # Use explicit abstract parameter first, then metadata dict, then existing
613
+ clean_abstract = None
614
+ if abstract:
615
+ clean_abstract = TextNormalizer.clean_metadata_text(abstract)
616
+ elif metadata and metadata.get("abstract"):
617
+ clean_abstract = TextNormalizer.clean_metadata_text(
618
+ metadata["abstract"]
619
+ )
620
+ elif existing_metadata.get("abstract"):
621
+ clean_abstract = TextNormalizer.clean_metadata_text(
622
+ existing_metadata["abstract"]
623
+ )
624
+
625
+ # Handle doi_source - explicit parameter takes precedence
626
+ doi_source_value = doi_source or existing_metadata.get("doi_source")
627
+ if not doi_source_value and source:
628
+ # Normalize legacy source parameter
629
+ if "crossref" in source.lower():
630
+ doi_source_value = "crossref"
631
+ elif "semantic" in source.lower():
632
+ doi_source_value = "semantic_scholar"
633
+ elif "pubmed" in source.lower():
634
+ doi_source_value = "pubmed"
635
+ elif "openalex" in source.lower():
636
+ doi_source_value = "openalex"
637
+ else:
638
+ doi_source_value = source
639
+
640
+ comprehensive_metadata = {
641
+ # Core bibliographic fields
642
+ "title": clean_title,
643
+ "title_source": title_source
644
+ or existing_metadata.get("title_source", "input"),
645
+ "doi": existing_metadata.get("doi", doi),
646
+ "doi_source": doi_source_value,
647
+ "year": existing_metadata.get("year", year),
648
+ "year_source": year_source
649
+ or existing_metadata.get("year_source", "input" if year else None),
650
+ "authors": existing_metadata.get("authors", authors or []),
651
+ "authors_source": authors_source
652
+ or existing_metadata.get(
653
+ "authors_source", "input" if authors else None
654
+ ),
655
+ "journal": existing_metadata.get("journal", journal),
656
+ "journal_source": journal_source
657
+ or existing_metadata.get(
658
+ "journal_source", "input" if journal else None
659
+ ),
660
+ # Additional bibliographic fields from explicit parameters
661
+ "volume": existing_metadata.get("volume", volume),
662
+ "issue": existing_metadata.get("issue", issue),
663
+ "pages": existing_metadata.get("pages", pages),
664
+ "publisher": existing_metadata.get("publisher", publisher),
665
+ "issn": existing_metadata.get("issn", issn),
666
+ "short_journal": existing_metadata.get(
667
+ "short_journal", short_journal
668
+ ),
669
+ # Abstract with source tracking
670
+ "abstract": existing_metadata.get("abstract", clean_abstract),
671
+ "abstract_source": abstract_source
672
+ or existing_metadata.get(
673
+ "abstract_source", "input" if abstract else None
674
+ ),
675
+ # Enrichment fields
676
+ "citation_count": existing_metadata.get(
677
+ "citation_count", citation_count
678
+ ),
679
+ "impact_factor": existing_metadata.get(
680
+ "impact_factor", impact_factor
681
+ ),
682
+ "scitex_id": existing_metadata.get(
683
+ "scitex_id", existing_metadata.get("scholar_id", paper_id)
684
+ ),
685
+ "created_at": existing_metadata.get(
686
+ "created_at", datetime.now().isoformat()
687
+ ),
688
+ "created_by": existing_metadata.get(
689
+ "created_by", "SciTeX Scholar"
690
+ ),
691
+ "updated_at": datetime.now().isoformat(),
692
+ "projects": existing_metadata.get(
693
+ "projects", [] if self.project == "master" else [self.project]
694
+ ),
695
+ "master_storage_path": str(master_storage_path),
696
+ "readable_name": readable_name,
697
+ "metadata_file": str(master_metadata_file),
698
+ }
699
+
700
+ # Store plain dict version for JSON serialization
701
+ comprehensive_metadata_plain = self._dotdict_to_dict(
702
+ comprehensive_metadata
703
+ )
704
+
705
+ # Convert to standardized format before saving
706
+ standardized_metadata = self._convert_to_standardized_metadata(
707
+ comprehensive_metadata_plain
708
+ )
709
+
710
+ # Wrap with Paper container properties
711
+ final_structure = OrderedDict(
712
+ [
713
+ ("metadata", standardized_metadata),
714
+ (
715
+ "container",
716
+ OrderedDict(
717
+ [
718
+ (
719
+ "scitex_id",
720
+ comprehensive_metadata_plain.get("scitex_id"),
721
+ ),
722
+ ("library_id", paper_id),
723
+ (
724
+ "created_at",
725
+ comprehensive_metadata_plain.get("created_at"),
726
+ ),
727
+ (
728
+ "created_by",
729
+ comprehensive_metadata_plain.get("created_by"),
730
+ ),
731
+ (
732
+ "updated_at",
733
+ comprehensive_metadata_plain.get("updated_at"),
734
+ ),
735
+ (
736
+ "projects",
737
+ comprehensive_metadata_plain.get(
738
+ "projects", []
739
+ ),
740
+ ),
741
+ ("master_storage_path", str(master_storage_path)),
742
+ ("readable_name", readable_name),
743
+ ("metadata_file", str(master_metadata_file)),
744
+ (
745
+ "pdf_downloaded_at",
746
+ comprehensive_metadata_plain.get(
747
+ "pdf_downloaded_at"
748
+ ),
749
+ ),
750
+ (
751
+ "pdf_size_bytes",
752
+ comprehensive_metadata_plain.get(
753
+ "pdf_size_bytes"
754
+ ),
755
+ ),
756
+ ]
757
+ ),
758
+ ),
759
+ ]
760
+ )
761
+
762
+ with open(master_metadata_file, "w") as file_:
763
+ json.dump(final_structure, file_, indent=2, ensure_ascii=False)
764
+
765
+ logger.success(f"Saved paper to MASTER Scholar library: {paper_id}")
766
+
767
+ # Create project symlink if project is specified and not MASTER
768
+ if self.project and self.project not in ["master", "MASTER"]:
769
+ try:
770
+ # Use centralized naming logic - use original comprehensive_metadata (not plain)
771
+ readable_name = self._generate_readable_name(
772
+ comprehensive_metadata=comprehensive_metadata,
773
+ master_storage_path=master_storage_path,
774
+ authors=authors,
775
+ year=year,
776
+ journal=journal,
777
+ )
778
+
779
+ self._create_project_symlink(
780
+ master_storage_path=master_storage_path,
781
+ project=self.project,
782
+ readable_name=readable_name,
783
+ )
784
+ except Exception as exc_:
785
+ logger.error(
786
+ f"Failed to create symlink for {paper_id}: {exc_}"
787
+ )
788
+
789
+ return paper_id
790
+
791
+ def save_unresolved_paper(
792
+ self,
793
+ title: str,
794
+ year: Optional[int] = None,
795
+ authors: Optional[List[str]] = None,
796
+ reason: str = "DOI not found",
797
+ bibtex_source: Optional[str] = None,
798
+ ) -> None:
799
+ """Save paper that couldn't be resolved to unresolved directory."""
800
+ clean_title = (
801
+ TextNormalizer.clean_metadata_text(title) if title else ""
802
+ )
803
+ unresolved_info = {
804
+ "title": clean_title,
805
+ "year": year,
806
+ "authors": authors or [],
807
+ "reason": reason,
808
+ "bibtex_source": bibtex_source,
809
+ "project": self.project,
810
+ "created_at": datetime.now().isoformat(),
811
+ "created_by": "SciTeX Scholar",
812
+ }
813
+
814
+ project_lib_path = (
815
+ self.config.path_manager.get_scholar_library_path() / self.project
816
+ )
817
+ unresolved_dir = project_lib_path / "unresolved"
818
+ unresolved_dir.mkdir(parents=True, exist_ok=True)
819
+
820
+ safe_title = title or "untitled"
821
+ safe_title = re.sub(r"[^\w\s-]", "", safe_title)[:50]
822
+ safe_title = re.sub(r"[-\s]+", "_", safe_title)
823
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
824
+ unresolved_file = unresolved_dir / f"{safe_title}_{timestamp}.json"
825
+
826
+ with open(unresolved_file, "w") as file_:
827
+ json.dump(unresolved_info, file_, indent=2, ensure_ascii=False)
828
+
829
+ logger.warning(f"Saved unresolved entry: {unresolved_file.name}")
830
+
831
+ async def resolve_and_create_library_structure_async(
832
+ self,
833
+ papers: List[Dict[str, Any]],
834
+ project: str,
835
+ sources: Optional[List[str]] = None,
836
+ ) -> Dict[str, Dict[str, str]]:
837
+ """Resolve DOIs and create full Scholar library structure with proper paths."""
838
+ if not self.single_doi_resolver:
839
+ raise ValueError(
840
+ "SingleDOIResolver is required for resolving DOIs"
841
+ )
842
+
843
+ results = {}
844
+ for paper in papers:
845
+ title = paper.get("title")
846
+ if not title:
847
+ logger.warning(f"Skipping paper without title: {paper}")
848
+ continue
849
+
850
+ logger.info(f"Processing: {title[:50]}...")
851
+
852
+ try:
853
+ doi_result = await self.single_doi_resolver.metadata2doi_async(
854
+ title=title,
855
+ year=paper.get("year"),
856
+ authors=paper.get("authors"),
857
+ sources=sources,
858
+ )
859
+
860
+ enhanced_metadata = self._extract_enhanced_metadata(
861
+ doi_result, paper
862
+ )
863
+ paper_info = {**paper, **enhanced_metadata}
864
+
865
+ storage_paths = self._call_path_manager_get_storage_paths(
866
+ paper_info=paper_info, collection_name="MASTER"
867
+ )
868
+ paper_id = storage_paths["unique_id"]
869
+ storage_path = storage_paths["storage_path"]
870
+ metadata_file = storage_path / "metadata.json"
871
+
872
+ complete_metadata = self._create_complete_metadata(
873
+ paper, doi_result, paper_id, enhanced_metadata
874
+ )
875
+
876
+ with open(metadata_file, "w") as file_:
877
+ json.dump(complete_metadata, file_, indent=2)
878
+
879
+ logger.success(
880
+ f"Saved metadata.json for {paper_id} ({len(complete_metadata)} fields)"
881
+ )
882
+
883
+ project_symlink_path = self._create_project_symlink(
884
+ master_storage_path=storage_path,
885
+ project=project,
886
+ readable_name=storage_paths["readable_name"],
887
+ )
888
+
889
+ bibtex_source_filename = getattr(
890
+ self, "_source_filename", "papers"
891
+ )
892
+ info_dir = self._create_bibtex_info_structure(
893
+ project=project,
894
+ paper_info={**paper, **enhanced_metadata},
895
+ complete_metadata=complete_metadata,
896
+ bibtex_source_filename=bibtex_source_filename,
897
+ )
898
+
899
+ results[title] = {
900
+ "scitex_id": paper_id,
901
+ "scholar_id": paper_id,
902
+ "doi": complete_metadata.get("doi"),
903
+ "master_storage_path": str(storage_path),
904
+ "project_symlink_path": (
905
+ str(project_symlink_path)
906
+ if project_symlink_path
907
+ else None
908
+ ),
909
+ "readable_name": storage_paths["readable_name"],
910
+ "metadata_file": str(metadata_file),
911
+ "info_dir": str(info_dir) if info_dir else None,
912
+ }
913
+
914
+ logger.info(f"Created library entry: {paper_id}")
915
+ if complete_metadata.get("doi"):
916
+ logger.info(f" DOI: {complete_metadata['doi']}")
917
+ logger.info(f" Storage: {storage_path}")
918
+
919
+ except Exception as exc_:
920
+ logger.error(f"❌ Error processing '{title[:30]}...': {exc_}")
921
+
922
+ logger.success(
923
+ f"Created Scholar library entries for {len(results)}/{len(papers)} papers"
924
+ )
925
+ return results
926
+
927
+ async def resolve_and_create_library_structure_with_source_async(
928
+ self,
929
+ papers: List[Dict[str, Any]],
930
+ project: str,
931
+ sources: Optional[List[str]] = None,
932
+ bibtex_source_filename: str = "papers",
933
+ ) -> Dict[str, Dict[str, str]]:
934
+ """Enhanced version that passes source filename for BibTeX structure."""
935
+ self._source_filename = bibtex_source_filename
936
+ return await self.resolve_and_create_library_structure_async(
937
+ papers=papers, project=project, sources=sources
938
+ )
939
+
940
+ def _extract_enhanced_metadata(
941
+ self, doi_result: Optional[Dict], paper: Dict
942
+ ) -> Dict[str, Any]:
943
+ """Extract enhanced metadata from DOI resolution result."""
944
+ enhanced = {}
945
+ if doi_result and isinstance(doi_result, dict):
946
+ metadata_source = doi_result.get("metadata", {})
947
+ enhanced.update(
948
+ {
949
+ "doi": doi_result.get("doi"),
950
+ "journal": metadata_source.get("journal")
951
+ or doi_result.get("journal")
952
+ or paper.get("journal"),
953
+ "authors": metadata_source.get("authors")
954
+ or doi_result.get("authors")
955
+ or paper.get("authors"),
956
+ "year": metadata_source.get("year")
957
+ or doi_result.get("year")
958
+ or paper.get("year"),
959
+ "title": metadata_source.get("title")
960
+ or doi_result.get("title")
961
+ or paper.get("title"),
962
+ "abstract": metadata_source.get("abstract")
963
+ or doi_result.get("abstract"),
964
+ "publisher": metadata_source.get("publisher")
965
+ or doi_result.get("publisher"),
966
+ "volume": metadata_source.get("volume")
967
+ or doi_result.get("volume"),
968
+ "issue": metadata_source.get("issue")
969
+ or doi_result.get("issue"),
970
+ "pages": metadata_source.get("pages")
971
+ or doi_result.get("pages"),
972
+ "issn": metadata_source.get("issn")
973
+ or doi_result.get("issn"),
974
+ "short_journal": metadata_source.get("short_journal")
975
+ or doi_result.get("short_journal"),
976
+ }
977
+ )
978
+
979
+ if doi_result.get("doi"):
980
+ logger.success(
981
+ f"Enhanced metadata from DOI source: {dict(metadata_source)}"
982
+ )
983
+
984
+ return enhanced
985
+
986
+ def _create_complete_metadata(
987
+ self,
988
+ paper: Dict,
989
+ doi_result: Optional[Dict],
990
+ paper_id: str,
991
+ enhanced_metadata: Dict,
992
+ ) -> Dict[str, Any]:
993
+ """Create complete metadata dictionary with source tracking."""
994
+ raw_title = enhanced_metadata.get("title") or paper.get("title")
995
+ clean_title = (
996
+ TextNormalizer.clean_metadata_text(raw_title) if raw_title else ""
997
+ )
998
+ raw_abstract = None
999
+ if enhanced_metadata.get("abstract"):
1000
+ raw_abstract = TextNormalizer.clean_metadata_text(
1001
+ enhanced_metadata["abstract"]
1002
+ )
1003
+
1004
+ doi_source_value = None
1005
+ if doi_result and doi_result.get("source"):
1006
+ source = doi_result["source"]
1007
+ if "crossref" in source.lower():
1008
+ doi_source_value = "crossref"
1009
+ elif "semantic" in source.lower():
1010
+ doi_source_value = "semantic_scholar"
1011
+ elif "pubmed" in source.lower():
1012
+ doi_source_value = "pubmed"
1013
+ elif "openalex" in source.lower():
1014
+ doi_source_value = "openalex"
1015
+ else:
1016
+ doi_source_value = source
1017
+
1018
+ complete_metadata = {
1019
+ "title": clean_title,
1020
+ "title_source": (
1021
+ doi_source_value
1022
+ if enhanced_metadata.get("title") != paper.get("title")
1023
+ else "manual"
1024
+ ),
1025
+ "authors": enhanced_metadata.get("authors")
1026
+ or paper.get("authors"),
1027
+ "authors_source": (
1028
+ doi_source_value
1029
+ if enhanced_metadata.get("authors") != paper.get("authors")
1030
+ else ("manual" if paper.get("authors") else None)
1031
+ ),
1032
+ "year": enhanced_metadata.get("year") or paper.get("year"),
1033
+ "year_source": (
1034
+ doi_source_value
1035
+ if enhanced_metadata.get("year") != paper.get("year")
1036
+ else ("manual" if paper.get("year") else None)
1037
+ ),
1038
+ "journal": enhanced_metadata.get("journal")
1039
+ or paper.get("journal"),
1040
+ "journal_source": (
1041
+ doi_source_value
1042
+ if enhanced_metadata.get("journal") != paper.get("journal")
1043
+ else ("manual" if paper.get("journal") else None)
1044
+ ),
1045
+ "abstract": raw_abstract,
1046
+ "abstract_source": (
1047
+ doi_source_value if enhanced_metadata.get("abstract") else None
1048
+ ),
1049
+ "scitex_id": paper_id,
1050
+ "created_at": datetime.now().isoformat(),
1051
+ "created_by": "SciTeX Scholar",
1052
+ }
1053
+
1054
+ if doi_result and isinstance(doi_result, dict):
1055
+ safe_fields = [
1056
+ "publisher",
1057
+ "volume",
1058
+ "issue",
1059
+ "pages",
1060
+ "issn",
1061
+ "short_journal",
1062
+ ]
1063
+ for field in safe_fields:
1064
+ value = enhanced_metadata.get(field)
1065
+ if value is not None:
1066
+ complete_metadata[field] = value
1067
+ complete_metadata[f"{field}_source"] = (
1068
+ doi_source_value or "unknown_api"
1069
+ )
1070
+
1071
+ if doi_result and doi_result.get("doi"):
1072
+ complete_metadata.update(
1073
+ {"doi": doi_result["doi"], "doi_source": doi_source_value}
1074
+ )
1075
+ logger.success(f"DOI resolved for {paper_id}: {doi_result['doi']}")
1076
+ else:
1077
+ complete_metadata.update(
1078
+ {
1079
+ "doi": None,
1080
+ "doi_source": None,
1081
+ "doi_resolution_failed": True,
1082
+ }
1083
+ )
1084
+ logger.warning(
1085
+ f"DOI resolution failed for {paper_id}: {paper.get('title', '')[:40]}..."
1086
+ )
1087
+
1088
+ standard_fields = {
1089
+ "keywords": None,
1090
+ "references": None,
1091
+ "venue": None,
1092
+ "publisher": None,
1093
+ "volume": None,
1094
+ "issue": None,
1095
+ "pages": None,
1096
+ "issn": None,
1097
+ "short_journal": None,
1098
+ }
1099
+
1100
+ missing_fields = []
1101
+ for field, default_value in standard_fields.items():
1102
+ if (
1103
+ field not in complete_metadata
1104
+ or complete_metadata[field] is None
1105
+ ):
1106
+ complete_metadata[field] = default_value
1107
+ missing_fields.append(field)
1108
+
1109
+ if missing_fields:
1110
+ logger.info(
1111
+ f"Missing fields for future enhancement: {', '.join(missing_fields)}"
1112
+ )
1113
+
1114
+ storage_paths = self._call_path_manager_get_storage_paths(
1115
+ paper_info={**paper, **enhanced_metadata}, collection_name="MASTER"
1116
+ )
1117
+ storage_path = storage_paths["storage_path"]
1118
+
1119
+ complete_metadata.update(
1120
+ {
1121
+ "master_storage_path": str(storage_path),
1122
+ "readable_name": storage_paths["readable_name"],
1123
+ "metadata_file": str(storage_path / "metadata.json"),
1124
+ }
1125
+ )
1126
+
1127
+ return complete_metadata
1128
+
1129
+ def _generate_readable_name(
1130
+ self,
1131
+ comprehensive_metadata: Dict,
1132
+ master_storage_path: Path,
1133
+ authors: Optional[List[str]] = None,
1134
+ year: Optional[int] = None,
1135
+ journal: Optional[str] = None,
1136
+ ) -> str:
1137
+ """Generate readable symlink name from metadata.
1138
+
1139
+ Single source of truth for symlink naming format.
1140
+ """
1141
+ # Extract author
1142
+ first_author = "Unknown"
1143
+ if authors and len(authors) > 0:
1144
+ author_parts = authors[0].split()
1145
+ first_author = (
1146
+ author_parts[-1] if len(author_parts) > 1 else author_parts[0]
1147
+ )
1148
+ first_author = "".join(
1149
+ c for c in first_author if c.isalnum() or c == "-"
1150
+ )[:20]
1151
+
1152
+ # Format year (handle DotDict and other non-int types)
1153
+ from scitex.dict import DotDict
1154
+
1155
+ if isinstance(year, DotDict):
1156
+ # Extract value if it's a DotDict
1157
+ year = (
1158
+ None # Can't extract year from DotDict structure, use Unknown
1159
+ )
1160
+
1161
+ # Convert to int if it's a string representation
1162
+ if isinstance(year, str) and year.isdigit():
1163
+ year = int(year)
1164
+
1165
+ # Only use year if it's actually an int
1166
+ if isinstance(year, int):
1167
+ year_str = f"{year:04d}"
1168
+ else:
1169
+ year_str = "0000"
1170
+
1171
+ # Clean journal name using PathManager (single source of truth)
1172
+ journal_clean = "Unknown"
1173
+ if journal:
1174
+ journal_clean = self.config.path_manager._sanitize_filename(journal)[:30]
1175
+ if not journal_clean:
1176
+ journal_clean = "Unknown"
1177
+
1178
+ # Get citation count and impact factor (handle both flat and nested formats)
1179
+ # Check if this is the nested structure from file (has "metadata" key)
1180
+ if "metadata" in comprehensive_metadata:
1181
+ # Nested structure from file
1182
+ metadata_section = comprehensive_metadata.get("metadata", {})
1183
+
1184
+ # Extract citation count from nested structure
1185
+ cc_val = metadata_section.get("citation_count", {})
1186
+ if isinstance(cc_val, dict):
1187
+ cc = cc_val.get("total", 0) or 0
1188
+ else:
1189
+ cc = cc_val or 0
1190
+
1191
+ # Extract impact factor from nested structure
1192
+ publication_section = metadata_section.get("publication", {})
1193
+ if_val = publication_section.get("impact_factor", 0.0) or 0.0
1194
+
1195
+ else:
1196
+ # Flat structure (during initial save)
1197
+ cc_val = comprehensive_metadata.get("citation_count", 0)
1198
+ if isinstance(cc_val, dict):
1199
+ cc = cc_val.get("total", 0) or 0
1200
+ else:
1201
+ cc = cc_val or 0
1202
+
1203
+ # Try multiple paths for impact_factor
1204
+ if_val = (
1205
+ comprehensive_metadata.get("journal_impact_factor")
1206
+ or comprehensive_metadata.get("impact_factor")
1207
+ or comprehensive_metadata.get("publication", {}).get(
1208
+ "impact_factor"
1209
+ )
1210
+ )
1211
+ if isinstance(if_val, dict):
1212
+ if_val = if_val.get("value", 0.0) or 0.0
1213
+ else:
1214
+ if_val = if_val or 0.0
1215
+
1216
+ # Check PDF status with more granular states
1217
+ pdf_files = list(master_storage_path.glob("*.pdf"))
1218
+ screenshot_dir = master_storage_path / "screenshots"
1219
+ has_screenshots = screenshot_dir.exists() and any(
1220
+ screenshot_dir.iterdir()
1221
+ )
1222
+ downloading_marker = master_storage_path / ".downloading"
1223
+ attempted_marker = master_storage_path / ".download_attempted"
1224
+
1225
+ # Extract DOI from metadata to check availability
1226
+ doi = None
1227
+ if "metadata" in comprehensive_metadata:
1228
+ # Nested structure from file
1229
+ doi = (
1230
+ comprehensive_metadata.get("metadata", {})
1231
+ .get("id", {})
1232
+ .get("doi")
1233
+ )
1234
+ else:
1235
+ # Flat structure (during initial save)
1236
+ doi = comprehensive_metadata.get("doi")
1237
+
1238
+ if downloading_marker.exists():
1239
+ # Download in progress
1240
+ pdf_status_letter = "r"
1241
+ elif pdf_files:
1242
+ # Has PDF = Successful
1243
+ pdf_status_letter = "s"
1244
+ elif has_screenshots:
1245
+ # Has screenshots but no PDF = Failed (attempted but failed)
1246
+ pdf_status_letter = "f"
1247
+ elif attempted_marker.exists():
1248
+ # Download was attempted but failed early (before screenshots)
1249
+ pdf_status_letter = "f"
1250
+ elif not doi:
1251
+ # No DOI = Failed (cannot download without identifier)
1252
+ pdf_status_letter = "f"
1253
+ else:
1254
+ # No PDF, no screenshots, no attempts, has DOI = Pending (not attempted yet)
1255
+ pdf_status_letter = "p"
1256
+
1257
+ pdf_status_id_map = {
1258
+ "p": 0,
1259
+ "r": 1,
1260
+ "f": 2,
1261
+ "s": 3,
1262
+ }
1263
+ pdf_status_str = (
1264
+ f"{pdf_status_id_map[pdf_status_letter]}{pdf_status_letter}"
1265
+ )
1266
+ # Format: CC_000000-PDF_s-IF_032-2016-Author-Journal
1267
+ # PDF status: r=running, s=successful, f=failed, p=pending
1268
+ # readable_name = f"CC_{cc:06d}-PDF_{pdf_status_letter}-IF_{int(if_val):03d}-{year_str}-{first_author}-{journal_clean}"
1269
+ readable_name = f"PDF-{pdf_status_str}_CC-{cc:06d}_IF-{int(if_val):03d}_{year_str}_{first_author}_{journal_clean}"
1270
+
1271
+ return readable_name
1272
+
1273
+ def update_symlink(
1274
+ self,
1275
+ master_storage_path: Path,
1276
+ project: str,
1277
+ metadata: Optional[Dict] = None,
1278
+ ) -> Optional[Path]:
1279
+ """Update project symlink to reflect current paper status.
1280
+
1281
+ This should be called whenever paper status changes (pending → running → success/failed).
1282
+ Generates new readable name based on current state (checking .downloading marker, PDFs, etc.)
1283
+ and updates the symlink accordingly.
1284
+
1285
+ Args:
1286
+ master_storage_path: Path to paper in master library
1287
+ project: Project name
1288
+ metadata: Optional metadata dict (if not provided, will read from file)
1289
+
1290
+ Returns:
1291
+ Path to the created symlink, or None if failed
1292
+ """
1293
+ try:
1294
+ # Load metadata if not provided
1295
+ if metadata is None:
1296
+ metadata_file = master_storage_path / "metadata.json"
1297
+ if metadata_file.exists():
1298
+ import json
1299
+
1300
+ with open(metadata_file, "r") as f:
1301
+ metadata = json.load(f)
1302
+ else:
1303
+ logger.warning(
1304
+ f"No metadata found for {master_storage_path.name}"
1305
+ )
1306
+ return None
1307
+
1308
+ # Extract metadata from nested structure if needed
1309
+ # Metadata file has structure: {"metadata": {"basic": {...}, "id": {...}, ...}, "container": {...}}
1310
+ if "metadata" in metadata:
1311
+ # Nested structure from file
1312
+ meta_section = metadata.get("metadata", {})
1313
+ basic_section = meta_section.get("basic", {})
1314
+ pub_section = meta_section.get("publication", {})
1315
+
1316
+ authors = basic_section.get("authors")
1317
+ year = basic_section.get("year")
1318
+ journal = pub_section.get("journal")
1319
+ else:
1320
+ # Flat structure (should not happen when reading from file, but handle it)
1321
+ authors = metadata.get("authors")
1322
+ year = metadata.get("year")
1323
+ journal = metadata.get("journal")
1324
+
1325
+ # Generate readable name based on current state
1326
+ readable_name = self._generate_readable_name(
1327
+ comprehensive_metadata=metadata,
1328
+ master_storage_path=master_storage_path,
1329
+ authors=authors,
1330
+ year=year,
1331
+ journal=journal,
1332
+ )
1333
+
1334
+ # Create/update symlink
1335
+ return self._create_project_symlink(
1336
+ master_storage_path=master_storage_path,
1337
+ project=project,
1338
+ readable_name=readable_name,
1339
+ )
1340
+ except Exception as exc_:
1341
+ logger.error(
1342
+ f"Failed to update symlink for {master_storage_path.name}: {exc_}"
1343
+ )
1344
+ return None
1345
+
1346
+ def _create_project_symlink(
1347
+ self, master_storage_path: Path, project: str, readable_name: str
1348
+ ) -> Optional[Path]:
1349
+ """Create symlink in project directory pointing to master storage.
1350
+
1351
+ Removes old symlinks for the same paper with different statuses
1352
+ (e.g., removes PDF_p when creating PDF_s).
1353
+ """
1354
+
1355
+ try:
1356
+ project_dir = self.config.path_manager.get_library_project_dir(project)
1357
+ symlink_path = project_dir / readable_name
1358
+
1359
+ # Extract the master ID from the target path to find old symlinks
1360
+ master_id = master_storage_path.name
1361
+
1362
+ # Remove old symlinks pointing to the same master entry
1363
+ # but with different statuses (PDF_p, PDF_f, PDF_s)
1364
+ for existing_link in project_dir.iterdir():
1365
+ if not existing_link.is_symlink():
1366
+ continue
1367
+
1368
+ # Check if this symlink points to the same master entry
1369
+ try:
1370
+ target = existing_link.resolve()
1371
+ if (
1372
+ target.name == master_id
1373
+ and existing_link.name != readable_name
1374
+ ):
1375
+ # This is an old symlink for the same paper
1376
+ logger.debug(
1377
+ f"Removing old symlink: {existing_link.name}"
1378
+ )
1379
+ existing_link.unlink()
1380
+ except Exception as e:
1381
+ # Handle broken symlinks
1382
+ logger.debug(
1383
+ f"Skipping broken symlink {existing_link.name}: {e}"
1384
+ )
1385
+ continue
1386
+
1387
+ # Create new symlink
1388
+ if not symlink_path.exists():
1389
+ relative_path = os.path.relpath(
1390
+ master_storage_path, project_dir
1391
+ )
1392
+ symlink_path.symlink_to(relative_path)
1393
+ logger.success(
1394
+ f"Created project symlink: {symlink_path} -> {relative_path}"
1395
+ )
1396
+ else:
1397
+ logger.debug(f"Project symlink already exists: {symlink_path}")
1398
+
1399
+ return symlink_path
1400
+
1401
+ except Exception as exc_:
1402
+ logger.warning(f"Failed to create project symlink: {exc_}")
1403
+ return None
1404
+
1405
+ def _create_bibtex_info_structure(
1406
+ self,
1407
+ project: str,
1408
+ paper_info: Dict[str, Any],
1409
+ complete_metadata: Dict[str, Any],
1410
+ bibtex_source_filename: str = "papers",
1411
+ ) -> Optional[Path]:
1412
+ """Create info/papers_bib/pac.bib structure."""
1413
+ try:
1414
+ project_dir = self.config.path_manager.get_library_project_dir(project)
1415
+ info_dir = project_dir / "info" / f"{bibtex_source_filename}_bib"
1416
+ info_dir.mkdir(parents=True, exist_ok=True)
1417
+
1418
+ bibtex_file = info_dir / f"{bibtex_source_filename}.bib"
1419
+ unresolved_dir = info_dir / "unresolved"
1420
+ unresolved_dir.mkdir(parents=True, exist_ok=True)
1421
+
1422
+ first_author = "unknown"
1423
+ if complete_metadata.get("authors"):
1424
+ authors = complete_metadata["authors"]
1425
+ if isinstance(authors, list) and authors:
1426
+ first_author = str(authors[0]).split()[-1].lower()
1427
+ elif isinstance(authors, str):
1428
+ first_author = authors.split()[-1].lower()
1429
+
1430
+ year = complete_metadata.get("year", "unknown")
1431
+ entry_key = f"{first_author}{year}"
1432
+
1433
+ bibtex_entry = self._generate_bibtex_entry(
1434
+ complete_metadata, entry_key
1435
+ )
1436
+
1437
+ if bibtex_file.exists():
1438
+ with open(bibtex_file, "a", encoding="utf-8") as file_:
1439
+ file_.write(f"\n{bibtex_entry}")
1440
+ else:
1441
+ with open(bibtex_file, "w", encoding="utf-8") as file_:
1442
+ file_.write(bibtex_entry)
1443
+
1444
+ if not complete_metadata.get("doi"):
1445
+ unresolved_file = unresolved_dir / f"{entry_key}.json"
1446
+ unresolved_data = {
1447
+ "title": complete_metadata.get("title", ""),
1448
+ "authors": complete_metadata.get("authors", []),
1449
+ "year": complete_metadata.get("year", ""),
1450
+ "journal": complete_metadata.get("journal", ""),
1451
+ "scholar_id": complete_metadata.get("scholar_id", ""),
1452
+ "resolution_failed": True,
1453
+ "timestamp": complete_metadata.get("created_at", ""),
1454
+ }
1455
+ with open(unresolved_file, "w", encoding="utf-8") as file_:
1456
+ json.dump(unresolved_data, file_, indent=2)
1457
+ logger.info(f"Added unresolved entry: {unresolved_file}")
1458
+
1459
+ logger.success(f"Updated BibTeX info structure: {bibtex_file}")
1460
+ return info_dir
1461
+
1462
+ except Exception as exc_:
1463
+ logger.warning(f"Failed to create BibTeX info structure: {exc_}")
1464
+ return None
1465
+
1466
+ def _generate_bibtex_entry(
1467
+ self, metadata: Dict[str, Any], entry_key: str
1468
+ ) -> str:
1469
+ """Generate BibTeX entry from metadata."""
1470
+ entry_type = "article"
1471
+ if metadata.get("journal"):
1472
+ entry_type = "article"
1473
+ elif metadata.get("booktitle"):
1474
+ entry_type = "inproceedings"
1475
+ elif metadata.get("publisher") and not metadata.get("journal"):
1476
+ entry_type = "book"
1477
+
1478
+ bibtex = f"@{entry_type}{{{entry_key},\n"
1479
+
1480
+ field_mappings = {
1481
+ "title": "title",
1482
+ "authors": "author",
1483
+ "year": "year",
1484
+ "journal": "journal",
1485
+ "doi": "doi",
1486
+ "volume": "volume",
1487
+ "issue": "number",
1488
+ "pages": "pages",
1489
+ "publisher": "publisher",
1490
+ "booktitle": "booktitle",
1491
+ "abstract": "abstract",
1492
+ }
1493
+
1494
+ for meta_field, bibtex_field in field_mappings.items():
1495
+ value = metadata.get(meta_field)
1496
+ if value:
1497
+ if isinstance(value, list):
1498
+ value = " and ".join(str(val_) for val_ in value)
1499
+ value_escaped = (
1500
+ str(value).replace("{", "\\{").replace("}", "\\}")
1501
+ )
1502
+ bibtex += f" {bibtex_field} = {{{value_escaped}}},\n"
1503
+
1504
+ source_field = f"{meta_field}_source"
1505
+ if source_field in metadata:
1506
+ bibtex += f" % {bibtex_field}_source = {metadata[source_field]}\n"
1507
+
1508
+ bibtex += (
1509
+ f" % scholar_id = {metadata.get('scholar_id', 'unknown')},\n"
1510
+ )
1511
+ bibtex += (
1512
+ f" % created_at = {metadata.get('created_at', 'unknown')},\n"
1513
+ )
1514
+ bibtex += (
1515
+ f" % created_by = {metadata.get('created_by', 'unknown')},\n"
1516
+ )
1517
+ bibtex += "}\n"
1518
+
1519
+ return bibtex
1520
+
1521
+ # def _ensure_project_symlink(
1522
+ # self,
1523
+ # title: str,
1524
+ # year: Optional[int] = None,
1525
+ # authors: Optional[List[str]] = None,
1526
+ # paper_id: str = None,
1527
+ # master_storage_path: Path = None,
1528
+ # ) -> None:
1529
+ # """Ensure project symlink exists for paper in master library."""
1530
+ # try:
1531
+ # if not paper_id or not master_storage_path:
1532
+ # return
1533
+
1534
+ # project_lib_path = (
1535
+ # self.config.path_manager.get_scholar_library_path()
1536
+ # / self.project
1537
+ # )
1538
+ # project_lib_path.mkdir(parents=True, exist_ok=True)
1539
+
1540
+ # paper_info = {
1541
+ # "title": title,
1542
+ # "year": year,
1543
+ # "authors": authors or [],
1544
+ # }
1545
+ # readable_paths = self.config.path_manager.get_paper_storage_paths(
1546
+ # paper_info=paper_info, collection_name=self.project
1547
+ # )
1548
+ # readable_name = readable_paths["readable_name"]
1549
+ # symlink_path = project_lib_path / readable_name
1550
+
1551
+ # relative_path = f"../MASTER/{paper_id}"
1552
+ # if not symlink_path.exists():
1553
+ # symlink_path.symlink_to(relative_path)
1554
+ # logger.info(
1555
+ # f"Created project symlink: {readable_name} -> {relative_path}"
1556
+ # )
1557
+
1558
+ # except Exception as exc_:
1559
+ # logger.debug(f"Error creating project symlink: {exc_}")
1560
+
1561
+ def _ensure_project_symlink(
1562
+ self,
1563
+ title: str,
1564
+ year: Optional[int] = None,
1565
+ authors: Optional[List[str]] = None,
1566
+ paper_id: str = None,
1567
+ master_storage_path: Path = None,
1568
+ ) -> None:
1569
+
1570
+ try:
1571
+ if not paper_id or not master_storage_path:
1572
+ return
1573
+
1574
+ project_lib_path = (
1575
+ self.config.path_manager.get_scholar_library_path()
1576
+ / self.project
1577
+ )
1578
+ project_lib_path.mkdir(parents=True, exist_ok=True)
1579
+
1580
+ paper_info = {
1581
+ "title": title,
1582
+ "year": year,
1583
+ "authors": authors or [],
1584
+ }
1585
+ readable_paths = self._call_path_manager_get_storage_paths(
1586
+ paper_info=paper_info, collection_name=self.project
1587
+ )
1588
+ readable_name = readable_paths["readable_name"]
1589
+ symlink_path = project_lib_path / readable_name
1590
+ relative_path = f"../MASTER/{paper_id}"
1591
+
1592
+ if not symlink_path.exists():
1593
+ symlink_path.symlink_to(relative_path)
1594
+ logger.info(
1595
+ f"Created project symlink: {readable_name} -> {relative_path}"
1596
+ )
1597
+ except Exception as exc_:
1598
+ logger.debug(f"Error creating project symlink: {exc_}")
1599
+
1600
+ def _is_title_similar(
1601
+ self, title1: str, title2: str, threshold: float = 0.7
1602
+ ) -> bool:
1603
+ """Check if two titles are similar enough to be considered the same paper."""
1604
+ if not title1 or not title2:
1605
+ return False
1606
+
1607
+ def normalize_title(title: str) -> str:
1608
+ title = title.lower()
1609
+ title = re.sub(r"[^\w\s]", " ", title)
1610
+ title = re.sub(r"\s+", " ", title)
1611
+ return title.strip()
1612
+
1613
+ norm_title1 = normalize_title(title1)
1614
+ norm_title2 = normalize_title(title2)
1615
+
1616
+ words1 = set(norm_title1.split())
1617
+ words2 = set(norm_title2.split())
1618
+
1619
+ if not words1 or not words2:
1620
+ return False
1621
+
1622
+ intersection = len(words1.intersection(words2))
1623
+ union = len(words1.union(words2))
1624
+ similarity = intersection / union if union > 0 else 0.0
1625
+
1626
+ return similarity >= threshold
1627
+
1628
+ def update_library_metadata(
1629
+ self,
1630
+ paper_id: str,
1631
+ project: str,
1632
+ doi: str,
1633
+ metadata: Dict[str, Any],
1634
+ create_structure: bool = True,
1635
+ ) -> bool:
1636
+ """Update Scholar library metadata.json with resolved DOI."""
1637
+ try:
1638
+ library_path = self.config.path_manager.library_dir
1639
+ paper_dir = library_path / project / paper_id
1640
+ metadata_file = paper_dir / "metadata.json"
1641
+
1642
+ if create_structure and not paper_dir.exists():
1643
+ self.config.path_manager._ensure_directory(paper_dir)
1644
+ logger.info(f"Created Scholar library structure: {paper_dir}")
1645
+
1646
+ existing_metadata = {}
1647
+ if metadata_file.exists():
1648
+ try:
1649
+ with open(metadata_file, "r") as file_:
1650
+ existing_metadata = json.load(file_)
1651
+ except Exception as exc_:
1652
+ logger.warning(f"Error loading existing metadata: {exc_}")
1653
+
1654
+ updated_metadata = {
1655
+ **existing_metadata,
1656
+ **metadata,
1657
+ "doi": doi,
1658
+ "doi_resolved_at": datetime.now().isoformat(),
1659
+ "doi_source": "batch_doi_resolver",
1660
+ }
1661
+
1662
+ with open(metadata_file, "w") as file_:
1663
+ json.dump(updated_metadata, file_, indent=2)
1664
+
1665
+ logger.success(f"Updated metadata for {paper_id}: DOI {doi}")
1666
+ return True
1667
+
1668
+ except Exception as exc_:
1669
+ logger.error(
1670
+ f"Error updating library metadata for {paper_id}: {exc_}"
1671
+ )
1672
+ return False
1673
+
1674
+ def create_paper_directory_structure(
1675
+ self, paper_id: str, project: str
1676
+ ) -> Path:
1677
+ """Create basic paper directory structure."""
1678
+ library_path = self.config.path_manager.library_dir
1679
+ paper_dir = library_path / project / paper_id
1680
+
1681
+ self.config.path_manager._ensure_directory(paper_dir)
1682
+
1683
+ for subdir in ["attachments", "screenshots"]:
1684
+ subdir_path = paper_dir / subdir
1685
+ self.config.path_manager._ensure_directory(subdir_path)
1686
+
1687
+ logger.info(f"Created Scholar library structure: {paper_dir}")
1688
+ return paper_dir
1689
+
1690
+ def validate_library_structure(self, project: str) -> Dict[str, Any]:
1691
+ """Validate existing library structure for a project."""
1692
+ validation = {
1693
+ "valid": True,
1694
+ "warnings": [],
1695
+ "errors": [],
1696
+ "paper_count": 0,
1697
+ "missing_metadata": [],
1698
+ }
1699
+
1700
+ library_path = self.config.path_manager.library_dir
1701
+ project_dir = library_path / project
1702
+
1703
+ if not project_dir.exists():
1704
+ validation["errors"].append(
1705
+ f"Project directory does not exist: {project_dir}"
1706
+ )
1707
+ validation["valid"] = False
1708
+ return validation
1709
+
1710
+ for paper_dir in project_dir.iterdir():
1711
+ if paper_dir.is_dir() and len(paper_dir.name) == 8:
1712
+ validation["paper_count"] += 1
1713
+
1714
+ metadata_file = paper_dir / "metadata.json"
1715
+ if not metadata_file.exists():
1716
+ validation["missing_metadata"].append(paper_dir.name)
1717
+ validation["warnings"].append(
1718
+ f"Missing metadata.json: {paper_dir.name}"
1719
+ )
1720
+
1721
+ return validation
1722
+
1723
+ def resolve_and_update_library(
1724
+ self,
1725
+ papers_with_ids: List[Dict[str, Any]],
1726
+ project: str,
1727
+ sources: Optional[List[str]] = None,
1728
+ ) -> Dict[str, str]:
1729
+ """Resolve DOIs and update Scholar library metadata.json files."""
1730
+ if not self.single_doi_resolver:
1731
+ raise ValueError(
1732
+ "SingleDOIResolver is required for resolving DOIs"
1733
+ )
1734
+
1735
+ results = {}
1736
+ for paper in papers_with_ids:
1737
+ paper_id = paper.get("paper_id")
1738
+ if not paper_id:
1739
+ logger.warning(
1740
+ f"Skipping paper without paper_id: {paper.get('title', 'Unknown')}"
1741
+ )
1742
+ continue
1743
+
1744
+ title = paper.get("title")
1745
+ if not title:
1746
+ logger.warning(f"Skipping paper {paper_id} without title")
1747
+ continue
1748
+
1749
+ logger.info(f"Resolving DOI for {paper_id}: {title[:50]}...")
1750
+
1751
+ try:
1752
+ result = asyncio.run(
1753
+ self.single_doi_resolver.metadata2doi_async(
1754
+ title=title,
1755
+ year=paper.get("year"),
1756
+ authors=paper.get("authors"),
1757
+ sources=sources,
1758
+ )
1759
+ )
1760
+
1761
+ if result and isinstance(result, dict) and result.get("doi"):
1762
+ doi = result["doi"]
1763
+
1764
+ success = self.update_library_metadata(
1765
+ paper_id=paper_id,
1766
+ project=project,
1767
+ doi=doi,
1768
+ metadata={
1769
+ "title": title,
1770
+ "title_source": "input",
1771
+ "year": paper.get("year"),
1772
+ "year_source": (
1773
+ "input" if paper.get("year") else None
1774
+ ),
1775
+ "authors": paper.get("authors"),
1776
+ "authors_source": (
1777
+ "input" if paper.get("authors") else None
1778
+ ),
1779
+ "journal": paper.get("journal"),
1780
+ "journal_source": (
1781
+ "input" if paper.get("journal") else None
1782
+ ),
1783
+ "doi_resolution_source": result.get("source"),
1784
+ },
1785
+ )
1786
+
1787
+ if success:
1788
+ results[paper_id] = doi
1789
+ logger.success(f"✅ {paper_id}: {doi}")
1790
+ else:
1791
+ logger.error(
1792
+ f"❌ {paper_id}: DOI resolved but metadata update failed"
1793
+ )
1794
+ else:
1795
+ logger.warning(f"⚠️ {paper_id}: No DOI found")
1796
+
1797
+ except Exception as exc_:
1798
+ logger.error(f"❌ {paper_id}: Error during resolution: {exc_}")
1799
+
1800
+ logger.success(
1801
+ f"Resolved {len(results)}/{len(papers_with_ids)} DOIs and updated library metadata"
1802
+ )
1803
+ return results
1804
+
1805
+ def resolve_and_create_library_structure(
1806
+ self,
1807
+ papers: List[Dict[str, Any]],
1808
+ project: str,
1809
+ sources: Optional[List[str]] = None,
1810
+ ) -> Dict[str, Dict[str, str]]:
1811
+ """Synchronous wrapper for resolve_and_create_library_structure_async."""
1812
+ try:
1813
+ loop = asyncio.get_event_loop()
1814
+ if loop.is_running():
1815
+ raise RuntimeError(
1816
+ "Cannot run synchronous version in async context. "
1817
+ "Use resolve_and_create_library_structure_async() instead."
1818
+ )
1819
+ else:
1820
+ return loop.run_until_complete(
1821
+ self.resolve_and_create_library_structure_async(
1822
+ papers, project, sources
1823
+ )
1824
+ )
1825
+ except RuntimeError:
1826
+ return asyncio.run(
1827
+ self.resolve_and_create_library_structure_async(
1828
+ papers, project, sources
1829
+ )
1830
+ )
1831
+
1832
+
1833
+ __all__ = ["LibraryManager"]
1834
+
1835
+ # EOF