scitex 2.14.0__py3-none-any.whl → 2.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +21 -204
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +76 -27
  68. scitex/cli/capture.py +13 -20
  69. scitex/cli/introspect.py +481 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +357 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +23 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +314 -0
  79. scitex/cli/stats.py +15 -8
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +132 -8
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  178. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  179. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  180. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  181. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  182. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  183. scitex/security/README.md +3 -3
  184. scitex/session/README.md +1 -1
  185. scitex/session/__init__.py +26 -7
  186. scitex/session/_decorator.py +1 -1
  187. scitex/sh/README.md +1 -1
  188. scitex/sh/__init__.py +7 -4
  189. scitex/social/__init__.py +155 -0
  190. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  191. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  192. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  193. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  194. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  195. scitex/stats/_mcp/_handlers/_format.py +94 -0
  196. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  197. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  198. scitex/stats/_mcp/_handlers/_power.py +247 -0
  199. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  200. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  201. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  202. scitex/stats/_mcp/handlers.py +19 -1171
  203. scitex/stats/auto/_stat_style.py +175 -0
  204. scitex/stats/auto/_style_definitions.py +411 -0
  205. scitex/stats/auto/_styles.py +22 -620
  206. scitex/stats/descriptive/__init__.py +11 -8
  207. scitex/stats/descriptive/_ci.py +39 -0
  208. scitex/stats/power/_power.py +15 -4
  209. scitex/str/__init__.py +2 -1
  210. scitex/str/_title_case.py +63 -0
  211. scitex/template/README.md +1 -1
  212. scitex/template/__init__.py +25 -10
  213. scitex/template/_code_templates.py +147 -0
  214. scitex/template/_mcp/handlers.py +81 -0
  215. scitex/template/_mcp/tool_schemas.py +55 -0
  216. scitex/template/_templates/__init__.py +51 -0
  217. scitex/template/_templates/audio.py +233 -0
  218. scitex/template/_templates/canvas.py +312 -0
  219. scitex/template/_templates/capture.py +268 -0
  220. scitex/template/_templates/config.py +43 -0
  221. scitex/template/_templates/diagram.py +294 -0
  222. scitex/template/_templates/io.py +107 -0
  223. scitex/template/_templates/module.py +53 -0
  224. scitex/template/_templates/plt.py +202 -0
  225. scitex/template/_templates/scholar.py +267 -0
  226. scitex/template/_templates/session.py +130 -0
  227. scitex/template/_templates/session_minimal.py +43 -0
  228. scitex/template/_templates/session_plot.py +67 -0
  229. scitex/template/_templates/session_stats.py +77 -0
  230. scitex/template/_templates/stats.py +323 -0
  231. scitex/template/_templates/writer.py +296 -0
  232. scitex/template/clone_writer_directory.py +5 -5
  233. scitex/ui/_backends/_email.py +10 -2
  234. scitex/ui/_backends/_webhook.py +5 -1
  235. scitex/web/_search_pubmed.py +10 -6
  236. scitex/writer/README.md +1 -1
  237. scitex/writer/_mcp/handlers.py +11 -744
  238. scitex/writer/_mcp/tool_schemas.py +5 -335
  239. scitex-2.15.2.dist-info/METADATA +648 -0
  240. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/RECORD +246 -150
  241. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  242. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  243. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  244. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  245. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  246. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  247. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  248. scitex/diagram/_compile.py +0 -312
  249. scitex/diagram/_diagram.py +0 -355
  250. scitex/diagram/_mcp/__init__.py +0 -4
  251. scitex/diagram/_mcp/handlers.py +0 -400
  252. scitex/diagram/_mcp/tool_schemas.py +0 -157
  253. scitex/diagram/_presets.py +0 -173
  254. scitex/diagram/_schema.py +0 -182
  255. scitex/diagram/_split.py +0 -278
  256. scitex/gen/_ci.py +0 -12
  257. scitex/gen/_title_case.py +0 -89
  258. scitex/plt/_mcp/__init__.py +0 -4
  259. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  260. scitex/plt/_mcp/_handlers_figure.py +0 -195
  261. scitex/plt/_mcp/_handlers_plot.py +0 -252
  262. scitex/plt/_mcp/_handlers_style.py +0 -219
  263. scitex/plt/_mcp/handlers.py +0 -74
  264. scitex/plt/_mcp/tool_schemas.py +0 -497
  265. scitex/plt/mcp_server.py +0 -231
  266. scitex/scholar/data/.gitkeep +0 -0
  267. scitex/scholar/data/README.md +0 -44
  268. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  269. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  270. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  271. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  272. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  273. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  274. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  275. scitex/scholar/data/bib_files/pac.bib +0 -698
  276. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  277. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  278. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  279. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  280. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  281. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  282. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  283. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  284. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  285. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  286. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  287. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  288. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  289. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  290. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  291. scitex/scholar/data/impact_factor.db +0 -0
  292. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  293. scitex/scholar/examples/dev.py +0 -38
  294. scitex-2.14.0.dist-info/METADATA +0 -1238
  295. /scitex/{gen → context}/_detect_environment.py +0 -0
  296. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  297. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  298. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
  299. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
  300. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_paper_saving.py
4
+
5
+ """
6
+ Paper saving mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from collections import OrderedDict
14
+ from datetime import datetime
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
16
+
17
+ from scitex import logging
18
+ from scitex.scholar.utils import TextNormalizer
19
+
20
+ if TYPE_CHECKING:
21
+ from scitex.scholar.core.Paper import Paper
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class PaperSavingMixin:
27
+ """Mixin providing paper saving methods."""
28
+
29
+ def save_resolved_paper(
30
+ self,
31
+ paper_data: Optional[Paper] = None,
32
+ title: Optional[str] = None,
33
+ doi: Optional[str] = None,
34
+ authors: Optional[List[str]] = None,
35
+ year: Optional[int] = None,
36
+ journal: Optional[str] = None,
37
+ abstract: Optional[str] = None,
38
+ volume: Optional[str] = None,
39
+ issue: Optional[str] = None,
40
+ pages: Optional[str] = None,
41
+ publisher: Optional[str] = None,
42
+ issn: Optional[str] = None,
43
+ short_journal: Optional[str] = None,
44
+ citation_count: Optional[int] = None,
45
+ impact_factor: Optional[float] = None,
46
+ doi_source: Optional[str] = None,
47
+ title_source: Optional[str] = None,
48
+ abstract_source: Optional[str] = None,
49
+ authors_source: Optional[str] = None,
50
+ year_source: Optional[str] = None,
51
+ journal_source: Optional[str] = None,
52
+ library_id: Optional[str] = None,
53
+ project: Optional[str] = None,
54
+ metadata: Optional[Dict] = None,
55
+ bibtex_source: Optional[str] = None,
56
+ source: Optional[str] = None,
57
+ paper_id: Optional[str] = None,
58
+ **kwargs,
59
+ ) -> str:
60
+ """Save successfully resolved paper to Scholar library."""
61
+ # Extract fields from paper_data if provided
62
+ if paper_data is not None:
63
+ (
64
+ title,
65
+ doi,
66
+ authors,
67
+ year,
68
+ journal,
69
+ abstract,
70
+ publisher,
71
+ impact_factor,
72
+ library_id,
73
+ ) = self._extract_paper_data_fields(
74
+ paper_data,
75
+ title,
76
+ doi,
77
+ authors,
78
+ year,
79
+ journal,
80
+ abstract,
81
+ publisher,
82
+ impact_factor,
83
+ library_id,
84
+ )
85
+
86
+ # Handle legacy parameters
87
+ if paper_id and not library_id:
88
+ library_id = paper_id
89
+ if source and not doi_source:
90
+ doi_source = source
91
+
92
+ paper_info = {
93
+ "title": title,
94
+ "year": year,
95
+ "authors": authors or [],
96
+ "doi": doi,
97
+ "journal": journal,
98
+ }
99
+
100
+ # Use metadata dict as fallback
101
+ if metadata:
102
+ if not journal:
103
+ paper_info["journal"] = metadata.get("journal")
104
+ if not year:
105
+ paper_info["year"] = metadata.get("year")
106
+ if not authors:
107
+ paper_info["authors"] = metadata.get("authors") or []
108
+
109
+ # Check for existing paper (deduplication)
110
+ check_metadata = {
111
+ "doi": doi,
112
+ "title": title,
113
+ "authors": authors or [],
114
+ "year": year,
115
+ }
116
+ existing_paper_dir = self.dedup_manager.check_for_existing_paper(check_metadata)
117
+
118
+ if existing_paper_dir:
119
+ logger.info(f"Found existing paper: {existing_paper_dir.name}")
120
+ master_storage_path = existing_paper_dir
121
+ paper_id = existing_paper_dir.name
122
+ readable_name = None
123
+ else:
124
+ storage_path, readable_name, paper_id = (
125
+ self.config.path_manager.get_paper_storage_paths(
126
+ doi=doi,
127
+ title=title,
128
+ authors=authors or [],
129
+ year=year,
130
+ journal=journal,
131
+ project="MASTER",
132
+ )
133
+ )
134
+ master_storage_path = storage_path
135
+ if library_id:
136
+ paper_id = library_id
137
+
138
+ master_metadata_file = master_storage_path / "metadata.json"
139
+
140
+ existing_metadata = {}
141
+ if master_metadata_file.exists():
142
+ try:
143
+ with open(master_metadata_file) as file_:
144
+ existing_metadata = json.load(file_)
145
+ except (OSError, json.JSONDecodeError):
146
+ existing_metadata = {}
147
+
148
+ comprehensive_metadata = self._build_comprehensive_metadata(
149
+ existing_metadata=existing_metadata,
150
+ title=title,
151
+ doi=doi,
152
+ authors=authors,
153
+ year=year,
154
+ journal=journal,
155
+ abstract=abstract,
156
+ volume=volume,
157
+ issue=issue,
158
+ pages=pages,
159
+ publisher=publisher,
160
+ issn=issn,
161
+ short_journal=short_journal,
162
+ citation_count=citation_count,
163
+ impact_factor=impact_factor,
164
+ doi_source=doi_source,
165
+ title_source=title_source,
166
+ abstract_source=abstract_source,
167
+ authors_source=authors_source,
168
+ year_source=year_source,
169
+ journal_source=journal_source,
170
+ paper_id=paper_id,
171
+ master_storage_path=master_storage_path,
172
+ master_metadata_file=master_metadata_file,
173
+ readable_name=readable_name,
174
+ source=source,
175
+ metadata=metadata,
176
+ )
177
+
178
+ comprehensive_metadata_plain = self._dotdict_to_dict(comprehensive_metadata)
179
+ standardized_metadata = self._convert_to_standardized_metadata(
180
+ comprehensive_metadata_plain
181
+ )
182
+
183
+ final_structure = self._create_final_structure(
184
+ standardized_metadata,
185
+ comprehensive_metadata_plain,
186
+ paper_id,
187
+ master_storage_path,
188
+ readable_name,
189
+ master_metadata_file,
190
+ )
191
+
192
+ with open(master_metadata_file, "w") as file_:
193
+ json.dump(final_structure, file_, indent=2, ensure_ascii=False)
194
+
195
+ logger.success(f"Saved paper to MASTER Scholar library: {paper_id}")
196
+
197
+ # Create project symlink if needed
198
+ if self.project and self.project not in ["master", "MASTER"]:
199
+ try:
200
+ readable_name = self._generate_readable_name(
201
+ comprehensive_metadata=comprehensive_metadata,
202
+ master_storage_path=master_storage_path,
203
+ authors=authors,
204
+ year=year,
205
+ journal=journal,
206
+ )
207
+ self._create_project_symlink(
208
+ master_storage_path=master_storage_path,
209
+ project=self.project,
210
+ readable_name=readable_name,
211
+ )
212
+ except Exception as exc_:
213
+ logger.error(f"Failed to create symlink for {paper_id}: {exc_}")
214
+
215
+ return paper_id
216
+
217
+ def _extract_paper_data_fields(
218
+ self,
219
+ paper_data,
220
+ title,
221
+ doi,
222
+ authors,
223
+ year,
224
+ journal,
225
+ abstract,
226
+ publisher,
227
+ impact_factor,
228
+ library_id,
229
+ ):
230
+ """Extract fields from paper_data object."""
231
+ if hasattr(paper_data, "metadata"):
232
+ title = title or (paper_data.metadata.basic.title or "")
233
+ doi = doi or (paper_data.metadata.id.doi or "")
234
+ authors = authors or paper_data.metadata.basic.authors
235
+ year = year or paper_data.metadata.basic.year
236
+ journal = journal or paper_data.metadata.publication.journal
237
+ abstract = abstract or paper_data.metadata.basic.abstract
238
+ publisher = publisher or paper_data.metadata.publication.publisher
239
+ impact_factor = (
240
+ impact_factor or paper_data.metadata.publication.impact_factor
241
+ )
242
+ library_id = library_id or paper_data.container.library_id
243
+ elif isinstance(paper_data, dict):
244
+ title = title or paper_data.get("title", "")
245
+ doi = doi or paper_data.get("doi", "")
246
+ authors = authors or paper_data.get("authors", [])
247
+ year = year or paper_data.get("year")
248
+ journal = journal or paper_data.get("journal")
249
+ abstract = abstract or paper_data.get("abstract")
250
+ publisher = publisher or paper_data.get("publisher")
251
+ impact_factor = impact_factor or paper_data.get("impact_factor")
252
+ library_id = (
253
+ library_id
254
+ or paper_data.get("scitex_id")
255
+ or paper_data.get("scholar_id")
256
+ )
257
+ return (
258
+ title,
259
+ doi,
260
+ authors,
261
+ year,
262
+ journal,
263
+ abstract,
264
+ publisher,
265
+ impact_factor,
266
+ library_id,
267
+ )
268
+
269
+ def _build_comprehensive_metadata(self, **kwargs) -> Dict[str, Any]:
270
+ """Build comprehensive metadata dictionary."""
271
+ existing_metadata = kwargs["existing_metadata"]
272
+ title = kwargs["title"]
273
+ abstract = kwargs["abstract"]
274
+ metadata = kwargs["metadata"]
275
+ source = kwargs["source"]
276
+ doi_source = kwargs["doi_source"]
277
+
278
+ clean_title = TextNormalizer.clean_metadata_text(
279
+ existing_metadata.get("title", title)
280
+ )
281
+
282
+ clean_abstract = None
283
+ if abstract:
284
+ clean_abstract = TextNormalizer.clean_metadata_text(abstract)
285
+ elif metadata and metadata.get("abstract"):
286
+ clean_abstract = TextNormalizer.clean_metadata_text(metadata["abstract"])
287
+ elif existing_metadata.get("abstract"):
288
+ clean_abstract = TextNormalizer.clean_metadata_text(
289
+ existing_metadata["abstract"]
290
+ )
291
+
292
+ doi_source_value = doi_source or existing_metadata.get("doi_source")
293
+ if not doi_source_value and source:
294
+ doi_source_value = self._normalize_source(source)
295
+
296
+ return {
297
+ "title": clean_title,
298
+ "title_source": kwargs["title_source"]
299
+ or existing_metadata.get("title_source", "input"),
300
+ "doi": existing_metadata.get("doi", kwargs["doi"]),
301
+ "doi_source": doi_source_value,
302
+ "year": existing_metadata.get("year", kwargs["year"]),
303
+ "year_source": kwargs["year_source"]
304
+ or existing_metadata.get(
305
+ "year_source", "input" if kwargs["year"] else None
306
+ ),
307
+ "authors": existing_metadata.get("authors", kwargs["authors"] or []),
308
+ "authors_source": kwargs["authors_source"]
309
+ or existing_metadata.get(
310
+ "authors_source", "input" if kwargs["authors"] else None
311
+ ),
312
+ "journal": existing_metadata.get("journal", kwargs["journal"]),
313
+ "journal_source": kwargs["journal_source"]
314
+ or existing_metadata.get(
315
+ "journal_source", "input" if kwargs["journal"] else None
316
+ ),
317
+ "volume": existing_metadata.get("volume", kwargs["volume"]),
318
+ "issue": existing_metadata.get("issue", kwargs["issue"]),
319
+ "pages": existing_metadata.get("pages", kwargs["pages"]),
320
+ "publisher": existing_metadata.get("publisher", kwargs["publisher"]),
321
+ "issn": existing_metadata.get("issn", kwargs["issn"]),
322
+ "short_journal": existing_metadata.get(
323
+ "short_journal", kwargs["short_journal"]
324
+ ),
325
+ "abstract": existing_metadata.get("abstract", clean_abstract),
326
+ "abstract_source": kwargs["abstract_source"]
327
+ or existing_metadata.get("abstract_source", "input" if abstract else None),
328
+ "citation_count": existing_metadata.get(
329
+ "citation_count", kwargs["citation_count"]
330
+ ),
331
+ "impact_factor": existing_metadata.get(
332
+ "impact_factor", kwargs["impact_factor"]
333
+ ),
334
+ "scitex_id": existing_metadata.get(
335
+ "scitex_id", existing_metadata.get("scholar_id", kwargs["paper_id"])
336
+ ),
337
+ "created_at": existing_metadata.get(
338
+ "created_at", datetime.now().isoformat()
339
+ ),
340
+ "created_by": existing_metadata.get("created_by", "SciTeX Scholar"),
341
+ "updated_at": datetime.now().isoformat(),
342
+ "projects": existing_metadata.get(
343
+ "projects", [] if self.project == "master" else [self.project]
344
+ ),
345
+ "master_storage_path": str(kwargs["master_storage_path"]),
346
+ "readable_name": kwargs["readable_name"],
347
+ "metadata_file": str(kwargs["master_metadata_file"]),
348
+ }
349
+
350
+ def _normalize_source(self, source: str) -> str:
351
+ """Normalize legacy source parameter to standard format."""
352
+ if "crossref" in source.lower():
353
+ return "crossref"
354
+ elif "semantic" in source.lower():
355
+ return "semantic_scholar"
356
+ elif "pubmed" in source.lower():
357
+ return "pubmed"
358
+ elif "openalex" in source.lower():
359
+ return "openalex"
360
+ return source
361
+
362
+ def _create_final_structure(
363
+ self,
364
+ standardized_metadata,
365
+ comprehensive_metadata_plain,
366
+ paper_id,
367
+ master_storage_path,
368
+ readable_name,
369
+ master_metadata_file,
370
+ ) -> OrderedDict:
371
+ """Create final structure for saving."""
372
+ return OrderedDict(
373
+ [
374
+ ("metadata", standardized_metadata),
375
+ (
376
+ "container",
377
+ OrderedDict(
378
+ [
379
+ (
380
+ "scitex_id",
381
+ comprehensive_metadata_plain.get("scitex_id"),
382
+ ),
383
+ ("library_id", paper_id),
384
+ (
385
+ "created_at",
386
+ comprehensive_metadata_plain.get("created_at"),
387
+ ),
388
+ (
389
+ "created_by",
390
+ comprehensive_metadata_plain.get("created_by"),
391
+ ),
392
+ (
393
+ "updated_at",
394
+ comprehensive_metadata_plain.get("updated_at"),
395
+ ),
396
+ (
397
+ "projects",
398
+ comprehensive_metadata_plain.get("projects", []),
399
+ ),
400
+ ("master_storage_path", str(master_storage_path)),
401
+ ("readable_name", readable_name),
402
+ ("metadata_file", str(master_metadata_file)),
403
+ (
404
+ "pdf_downloaded_at",
405
+ comprehensive_metadata_plain.get("pdf_downloaded_at"),
406
+ ),
407
+ (
408
+ "pdf_size_bytes",
409
+ comprehensive_metadata_plain.get("pdf_size_bytes"),
410
+ ),
411
+ ]
412
+ ),
413
+ ),
414
+ ]
415
+ )
416
+
417
+ def save_unresolved_paper(
418
+ self,
419
+ title: str,
420
+ year: Optional[int] = None,
421
+ authors: Optional[List[str]] = None,
422
+ reason: str = "DOI not found",
423
+ bibtex_source: Optional[str] = None,
424
+ ) -> None:
425
+ """Save paper that couldn't be resolved to unresolved directory."""
426
+ clean_title = TextNormalizer.clean_metadata_text(title) if title else ""
427
+ unresolved_info = {
428
+ "title": clean_title,
429
+ "year": year,
430
+ "authors": authors or [],
431
+ "reason": reason,
432
+ "bibtex_source": bibtex_source,
433
+ "project": self.project,
434
+ "created_at": datetime.now().isoformat(),
435
+ "created_by": "SciTeX Scholar",
436
+ }
437
+
438
+ project_lib_path = (
439
+ self.config.path_manager.get_scholar_library_path() / self.project
440
+ )
441
+ unresolved_dir = project_lib_path / "unresolved"
442
+ unresolved_dir.mkdir(parents=True, exist_ok=True)
443
+
444
+ safe_title = title or "untitled"
445
+ safe_title = re.sub(r"[^\w\s-]", "", safe_title)[:50]
446
+ safe_title = re.sub(r"[-\s]+", "_", safe_title)
447
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
448
+ unresolved_file = unresolved_dir / f"{safe_title}_{timestamp}.json"
449
+
450
+ with open(unresolved_file, "w") as file_:
451
+ json.dump(unresolved_info, file_, indent=2, ensure_ascii=False)
452
+
453
+ logger.warning(f"Saved unresolved entry: {unresolved_file.name}")
454
+
455
+
456
+ # EOF