scitex 2.14.0__py3-none-any.whl → 2.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. scitex/__init__.py +47 -0
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +191 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/writer.py +21 -204
  17. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  18. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  19. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  20. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  21. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  22. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  23. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  24. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  25. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  26. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  27. scitex/audio/README.md +40 -36
  28. scitex/audio/__init__.py +127 -59
  29. scitex/audio/_branding.py +185 -0
  30. scitex/audio/_mcp/__init__.py +32 -0
  31. scitex/audio/_mcp/handlers.py +59 -6
  32. scitex/audio/_mcp/speak_handlers.py +238 -0
  33. scitex/audio/_relay.py +225 -0
  34. scitex/audio/engines/elevenlabs_engine.py +6 -1
  35. scitex/audio/mcp_server.py +228 -75
  36. scitex/canvas/README.md +1 -1
  37. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  38. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  39. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  40. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  41. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  42. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  43. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  44. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  45. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  46. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  47. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  48. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  49. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  50. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  51. scitex/cli/__init__.py +38 -43
  52. scitex/cli/audio.py +76 -27
  53. scitex/cli/capture.py +13 -20
  54. scitex/cli/introspect.py +443 -0
  55. scitex/cli/main.py +198 -109
  56. scitex/cli/mcp.py +60 -34
  57. scitex/cli/scholar/__init__.py +8 -0
  58. scitex/cli/scholar/_crossref_scitex.py +296 -0
  59. scitex/cli/scholar/_fetch.py +25 -3
  60. scitex/cli/social.py +314 -0
  61. scitex/cli/writer.py +117 -0
  62. scitex/config/README.md +1 -1
  63. scitex/config/__init__.py +16 -2
  64. scitex/config/_env_registry.py +191 -0
  65. scitex/diagram/__init__.py +42 -19
  66. scitex/diagram/mcp_server.py +13 -125
  67. scitex/introspect/__init__.py +75 -0
  68. scitex/introspect/_call_graph.py +303 -0
  69. scitex/introspect/_class_hierarchy.py +163 -0
  70. scitex/introspect/_core.py +42 -0
  71. scitex/introspect/_docstring.py +131 -0
  72. scitex/introspect/_examples.py +113 -0
  73. scitex/introspect/_imports.py +271 -0
  74. scitex/introspect/_mcp/__init__.py +37 -0
  75. scitex/introspect/_mcp/handlers.py +208 -0
  76. scitex/introspect/_members.py +151 -0
  77. scitex/introspect/_resolve.py +89 -0
  78. scitex/introspect/_signature.py +131 -0
  79. scitex/introspect/_source.py +80 -0
  80. scitex/introspect/_type_hints.py +172 -0
  81. scitex/io/bundle/README.md +1 -1
  82. scitex/mcp_server.py +98 -5
  83. scitex/plt/__init__.py +248 -550
  84. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  85. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  86. scitex/plt/gallery/README.md +1 -1
  87. scitex/plt/utils/_hitmap/__init__.py +82 -0
  88. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  89. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  90. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  91. scitex/plt/utils/_hitmap/_constants.py +40 -0
  92. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  93. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  94. scitex/plt/utils/_hitmap/_query.py +113 -0
  95. scitex/plt/utils/_hitmap.py +46 -1616
  96. scitex/plt/utils/_metadata/__init__.py +80 -0
  97. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  98. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  99. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  100. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  101. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  102. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  103. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  104. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  105. scitex/plt/utils/_metadata/_csv.py +416 -0
  106. scitex/plt/utils/_metadata/_detect.py +225 -0
  107. scitex/plt/utils/_metadata/_legend.py +127 -0
  108. scitex/plt/utils/_metadata/_rounding.py +117 -0
  109. scitex/plt/utils/_metadata/_verification.py +202 -0
  110. scitex/schema/README.md +1 -1
  111. scitex/scholar/__init__.py +8 -0
  112. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  113. scitex/scholar/core/Scholar.py +63 -1700
  114. scitex/scholar/core/_mixins/__init__.py +36 -0
  115. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  116. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  117. scitex/scholar/core/_mixins/_loaders.py +103 -0
  118. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  119. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  120. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  121. scitex/scholar/core/_mixins/_savers.py +69 -0
  122. scitex/scholar/core/_mixins/_search.py +103 -0
  123. scitex/scholar/core/_mixins/_services.py +88 -0
  124. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  125. scitex/scholar/crossref_scitex.py +367 -0
  126. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  127. scitex/scholar/examples/00_run_all.sh +120 -0
  128. scitex/scholar/jobs/_executors.py +27 -3
  129. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  130. scitex/scholar/pdf_download/_cli.py +154 -0
  131. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  132. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  133. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  134. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  135. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  136. scitex/scholar/pipelines/_single_steps.py +71 -36
  137. scitex/scholar/storage/_LibraryManager.py +97 -1695
  138. scitex/scholar/storage/_mixins/__init__.py +30 -0
  139. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  140. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  141. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  142. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  143. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  144. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  145. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  146. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  147. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  148. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  149. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  150. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  151. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  152. scitex/security/README.md +3 -3
  153. scitex/session/README.md +1 -1
  154. scitex/sh/README.md +1 -1
  155. scitex/social/__init__.py +153 -0
  156. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  157. scitex/template/README.md +1 -1
  158. scitex/template/clone_writer_directory.py +5 -5
  159. scitex/writer/README.md +1 -1
  160. scitex/writer/_mcp/handlers.py +11 -744
  161. scitex/writer/_mcp/tool_schemas.py +5 -335
  162. scitex-2.15.1.dist-info/METADATA +648 -0
  163. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/RECORD +166 -111
  164. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  165. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  166. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  167. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  168. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  169. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  170. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  171. scitex/diagram/_compile.py +0 -312
  172. scitex/diagram/_diagram.py +0 -355
  173. scitex/diagram/_mcp/__init__.py +0 -4
  174. scitex/diagram/_mcp/handlers.py +0 -400
  175. scitex/diagram/_mcp/tool_schemas.py +0 -157
  176. scitex/diagram/_presets.py +0 -173
  177. scitex/diagram/_schema.py +0 -182
  178. scitex/diagram/_split.py +0 -278
  179. scitex/plt/_mcp/__init__.py +0 -4
  180. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  181. scitex/plt/_mcp/_handlers_figure.py +0 -195
  182. scitex/plt/_mcp/_handlers_plot.py +0 -252
  183. scitex/plt/_mcp/_handlers_style.py +0 -219
  184. scitex/plt/_mcp/handlers.py +0 -74
  185. scitex/plt/_mcp/tool_schemas.py +0 -497
  186. scitex/plt/mcp_server.py +0 -231
  187. scitex/scholar/data/.gitkeep +0 -0
  188. scitex/scholar/data/README.md +0 -44
  189. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  190. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  191. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  192. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  193. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  194. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  195. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  196. scitex/scholar/data/bib_files/pac.bib +0 -698
  197. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  198. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  199. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  200. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  201. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  202. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  203. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  204. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  205. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  206. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  207. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  208. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  209. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  210. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  211. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  212. scitex/scholar/data/impact_factor.db +0 -0
  213. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  214. scitex/scholar/examples/dev.py +0 -38
  215. scitex-2.14.0.dist-info/METADATA +0 -1238
  216. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/WHEEL +0 -0
  217. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/entry_points.txt +0 -0
  218. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_paper_saving.py
4
+
5
+ """
6
+ Paper saving mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from collections import OrderedDict
14
+ from datetime import datetime
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
16
+
17
+ from scitex import logging
18
+ from scitex.scholar.utils import TextNormalizer
19
+
20
+ if TYPE_CHECKING:
21
+ from scitex.scholar.core.Paper import Paper
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class PaperSavingMixin:
27
+ """Mixin providing paper saving methods."""
28
+
29
+ def save_resolved_paper(
30
+ self,
31
+ paper_data: Optional[Paper] = None,
32
+ title: Optional[str] = None,
33
+ doi: Optional[str] = None,
34
+ authors: Optional[List[str]] = None,
35
+ year: Optional[int] = None,
36
+ journal: Optional[str] = None,
37
+ abstract: Optional[str] = None,
38
+ volume: Optional[str] = None,
39
+ issue: Optional[str] = None,
40
+ pages: Optional[str] = None,
41
+ publisher: Optional[str] = None,
42
+ issn: Optional[str] = None,
43
+ short_journal: Optional[str] = None,
44
+ citation_count: Optional[int] = None,
45
+ impact_factor: Optional[float] = None,
46
+ doi_source: Optional[str] = None,
47
+ title_source: Optional[str] = None,
48
+ abstract_source: Optional[str] = None,
49
+ authors_source: Optional[str] = None,
50
+ year_source: Optional[str] = None,
51
+ journal_source: Optional[str] = None,
52
+ library_id: Optional[str] = None,
53
+ project: Optional[str] = None,
54
+ metadata: Optional[Dict] = None,
55
+ bibtex_source: Optional[str] = None,
56
+ source: Optional[str] = None,
57
+ paper_id: Optional[str] = None,
58
+ **kwargs,
59
+ ) -> str:
60
+ """Save successfully resolved paper to Scholar library."""
61
+ # Extract fields from paper_data if provided
62
+ if paper_data is not None:
63
+ (
64
+ title,
65
+ doi,
66
+ authors,
67
+ year,
68
+ journal,
69
+ abstract,
70
+ publisher,
71
+ impact_factor,
72
+ library_id,
73
+ ) = self._extract_paper_data_fields(
74
+ paper_data,
75
+ title,
76
+ doi,
77
+ authors,
78
+ year,
79
+ journal,
80
+ abstract,
81
+ publisher,
82
+ impact_factor,
83
+ library_id,
84
+ )
85
+
86
+ # Handle legacy parameters
87
+ if paper_id and not library_id:
88
+ library_id = paper_id
89
+ if source and not doi_source:
90
+ doi_source = source
91
+
92
+ paper_info = {
93
+ "title": title,
94
+ "year": year,
95
+ "authors": authors or [],
96
+ "doi": doi,
97
+ "journal": journal,
98
+ }
99
+
100
+ # Use metadata dict as fallback
101
+ if metadata:
102
+ if not journal:
103
+ paper_info["journal"] = metadata.get("journal")
104
+ if not year:
105
+ paper_info["year"] = metadata.get("year")
106
+ if not authors:
107
+ paper_info["authors"] = metadata.get("authors") or []
108
+
109
+ # Check for existing paper (deduplication)
110
+ check_metadata = {
111
+ "doi": doi,
112
+ "title": title,
113
+ "authors": authors or [],
114
+ "year": year,
115
+ }
116
+ existing_paper_dir = self.dedup_manager.check_for_existing_paper(check_metadata)
117
+
118
+ if existing_paper_dir:
119
+ logger.info(f"Found existing paper: {existing_paper_dir.name}")
120
+ master_storage_path = existing_paper_dir
121
+ paper_id = existing_paper_dir.name
122
+ readable_name = None
123
+ else:
124
+ storage_path, readable_name, paper_id = (
125
+ self.config.path_manager.get_paper_storage_paths(
126
+ doi=doi,
127
+ title=title,
128
+ authors=authors or [],
129
+ year=year,
130
+ journal=journal,
131
+ project="MASTER",
132
+ )
133
+ )
134
+ master_storage_path = storage_path
135
+ if library_id:
136
+ paper_id = library_id
137
+
138
+ master_metadata_file = master_storage_path / "metadata.json"
139
+
140
+ existing_metadata = {}
141
+ if master_metadata_file.exists():
142
+ try:
143
+ with open(master_metadata_file) as file_:
144
+ existing_metadata = json.load(file_)
145
+ except (OSError, json.JSONDecodeError):
146
+ existing_metadata = {}
147
+
148
+ comprehensive_metadata = self._build_comprehensive_metadata(
149
+ existing_metadata=existing_metadata,
150
+ title=title,
151
+ doi=doi,
152
+ authors=authors,
153
+ year=year,
154
+ journal=journal,
155
+ abstract=abstract,
156
+ volume=volume,
157
+ issue=issue,
158
+ pages=pages,
159
+ publisher=publisher,
160
+ issn=issn,
161
+ short_journal=short_journal,
162
+ citation_count=citation_count,
163
+ impact_factor=impact_factor,
164
+ doi_source=doi_source,
165
+ title_source=title_source,
166
+ abstract_source=abstract_source,
167
+ authors_source=authors_source,
168
+ year_source=year_source,
169
+ journal_source=journal_source,
170
+ paper_id=paper_id,
171
+ master_storage_path=master_storage_path,
172
+ master_metadata_file=master_metadata_file,
173
+ readable_name=readable_name,
174
+ source=source,
175
+ metadata=metadata,
176
+ )
177
+
178
+ comprehensive_metadata_plain = self._dotdict_to_dict(comprehensive_metadata)
179
+ standardized_metadata = self._convert_to_standardized_metadata(
180
+ comprehensive_metadata_plain
181
+ )
182
+
183
+ final_structure = self._create_final_structure(
184
+ standardized_metadata,
185
+ comprehensive_metadata_plain,
186
+ paper_id,
187
+ master_storage_path,
188
+ readable_name,
189
+ master_metadata_file,
190
+ )
191
+
192
+ with open(master_metadata_file, "w") as file_:
193
+ json.dump(final_structure, file_, indent=2, ensure_ascii=False)
194
+
195
+ logger.success(f"Saved paper to MASTER Scholar library: {paper_id}")
196
+
197
+ # Create project symlink if needed
198
+ if self.project and self.project not in ["master", "MASTER"]:
199
+ try:
200
+ readable_name = self._generate_readable_name(
201
+ comprehensive_metadata=comprehensive_metadata,
202
+ master_storage_path=master_storage_path,
203
+ authors=authors,
204
+ year=year,
205
+ journal=journal,
206
+ )
207
+ self._create_project_symlink(
208
+ master_storage_path=master_storage_path,
209
+ project=self.project,
210
+ readable_name=readable_name,
211
+ )
212
+ except Exception as exc_:
213
+ logger.error(f"Failed to create symlink for {paper_id}: {exc_}")
214
+
215
+ return paper_id
216
+
217
+ def _extract_paper_data_fields(
218
+ self,
219
+ paper_data,
220
+ title,
221
+ doi,
222
+ authors,
223
+ year,
224
+ journal,
225
+ abstract,
226
+ publisher,
227
+ impact_factor,
228
+ library_id,
229
+ ):
230
+ """Extract fields from paper_data object."""
231
+ if hasattr(paper_data, "metadata"):
232
+ title = title or (paper_data.metadata.basic.title or "")
233
+ doi = doi or (paper_data.metadata.id.doi or "")
234
+ authors = authors or paper_data.metadata.basic.authors
235
+ year = year or paper_data.metadata.basic.year
236
+ journal = journal or paper_data.metadata.publication.journal
237
+ abstract = abstract or paper_data.metadata.basic.abstract
238
+ publisher = publisher or paper_data.metadata.publication.publisher
239
+ impact_factor = (
240
+ impact_factor or paper_data.metadata.publication.impact_factor
241
+ )
242
+ library_id = library_id or paper_data.container.library_id
243
+ elif isinstance(paper_data, dict):
244
+ title = title or paper_data.get("title", "")
245
+ doi = doi or paper_data.get("doi", "")
246
+ authors = authors or paper_data.get("authors", [])
247
+ year = year or paper_data.get("year")
248
+ journal = journal or paper_data.get("journal")
249
+ abstract = abstract or paper_data.get("abstract")
250
+ publisher = publisher or paper_data.get("publisher")
251
+ impact_factor = impact_factor or paper_data.get("impact_factor")
252
+ library_id = (
253
+ library_id
254
+ or paper_data.get("scitex_id")
255
+ or paper_data.get("scholar_id")
256
+ )
257
+ return (
258
+ title,
259
+ doi,
260
+ authors,
261
+ year,
262
+ journal,
263
+ abstract,
264
+ publisher,
265
+ impact_factor,
266
+ library_id,
267
+ )
268
+
269
+ def _build_comprehensive_metadata(self, **kwargs) -> Dict[str, Any]:
270
+ """Build comprehensive metadata dictionary."""
271
+ existing_metadata = kwargs["existing_metadata"]
272
+ title = kwargs["title"]
273
+ abstract = kwargs["abstract"]
274
+ metadata = kwargs["metadata"]
275
+ source = kwargs["source"]
276
+ doi_source = kwargs["doi_source"]
277
+
278
+ clean_title = TextNormalizer.clean_metadata_text(
279
+ existing_metadata.get("title", title)
280
+ )
281
+
282
+ clean_abstract = None
283
+ if abstract:
284
+ clean_abstract = TextNormalizer.clean_metadata_text(abstract)
285
+ elif metadata and metadata.get("abstract"):
286
+ clean_abstract = TextNormalizer.clean_metadata_text(metadata["abstract"])
287
+ elif existing_metadata.get("abstract"):
288
+ clean_abstract = TextNormalizer.clean_metadata_text(
289
+ existing_metadata["abstract"]
290
+ )
291
+
292
+ doi_source_value = doi_source or existing_metadata.get("doi_source")
293
+ if not doi_source_value and source:
294
+ doi_source_value = self._normalize_source(source)
295
+
296
+ return {
297
+ "title": clean_title,
298
+ "title_source": kwargs["title_source"]
299
+ or existing_metadata.get("title_source", "input"),
300
+ "doi": existing_metadata.get("doi", kwargs["doi"]),
301
+ "doi_source": doi_source_value,
302
+ "year": existing_metadata.get("year", kwargs["year"]),
303
+ "year_source": kwargs["year_source"]
304
+ or existing_metadata.get(
305
+ "year_source", "input" if kwargs["year"] else None
306
+ ),
307
+ "authors": existing_metadata.get("authors", kwargs["authors"] or []),
308
+ "authors_source": kwargs["authors_source"]
309
+ or existing_metadata.get(
310
+ "authors_source", "input" if kwargs["authors"] else None
311
+ ),
312
+ "journal": existing_metadata.get("journal", kwargs["journal"]),
313
+ "journal_source": kwargs["journal_source"]
314
+ or existing_metadata.get(
315
+ "journal_source", "input" if kwargs["journal"] else None
316
+ ),
317
+ "volume": existing_metadata.get("volume", kwargs["volume"]),
318
+ "issue": existing_metadata.get("issue", kwargs["issue"]),
319
+ "pages": existing_metadata.get("pages", kwargs["pages"]),
320
+ "publisher": existing_metadata.get("publisher", kwargs["publisher"]),
321
+ "issn": existing_metadata.get("issn", kwargs["issn"]),
322
+ "short_journal": existing_metadata.get(
323
+ "short_journal", kwargs["short_journal"]
324
+ ),
325
+ "abstract": existing_metadata.get("abstract", clean_abstract),
326
+ "abstract_source": kwargs["abstract_source"]
327
+ or existing_metadata.get("abstract_source", "input" if abstract else None),
328
+ "citation_count": existing_metadata.get(
329
+ "citation_count", kwargs["citation_count"]
330
+ ),
331
+ "impact_factor": existing_metadata.get(
332
+ "impact_factor", kwargs["impact_factor"]
333
+ ),
334
+ "scitex_id": existing_metadata.get(
335
+ "scitex_id", existing_metadata.get("scholar_id", kwargs["paper_id"])
336
+ ),
337
+ "created_at": existing_metadata.get(
338
+ "created_at", datetime.now().isoformat()
339
+ ),
340
+ "created_by": existing_metadata.get("created_by", "SciTeX Scholar"),
341
+ "updated_at": datetime.now().isoformat(),
342
+ "projects": existing_metadata.get(
343
+ "projects", [] if self.project == "master" else [self.project]
344
+ ),
345
+ "master_storage_path": str(kwargs["master_storage_path"]),
346
+ "readable_name": kwargs["readable_name"],
347
+ "metadata_file": str(kwargs["master_metadata_file"]),
348
+ }
349
+
350
+ def _normalize_source(self, source: str) -> str:
351
+ """Normalize legacy source parameter to standard format."""
352
+ if "crossref" in source.lower():
353
+ return "crossref"
354
+ elif "semantic" in source.lower():
355
+ return "semantic_scholar"
356
+ elif "pubmed" in source.lower():
357
+ return "pubmed"
358
+ elif "openalex" in source.lower():
359
+ return "openalex"
360
+ return source
361
+
362
+ def _create_final_structure(
363
+ self,
364
+ standardized_metadata,
365
+ comprehensive_metadata_plain,
366
+ paper_id,
367
+ master_storage_path,
368
+ readable_name,
369
+ master_metadata_file,
370
+ ) -> OrderedDict:
371
+ """Create final structure for saving."""
372
+ return OrderedDict(
373
+ [
374
+ ("metadata", standardized_metadata),
375
+ (
376
+ "container",
377
+ OrderedDict(
378
+ [
379
+ (
380
+ "scitex_id",
381
+ comprehensive_metadata_plain.get("scitex_id"),
382
+ ),
383
+ ("library_id", paper_id),
384
+ (
385
+ "created_at",
386
+ comprehensive_metadata_plain.get("created_at"),
387
+ ),
388
+ (
389
+ "created_by",
390
+ comprehensive_metadata_plain.get("created_by"),
391
+ ),
392
+ (
393
+ "updated_at",
394
+ comprehensive_metadata_plain.get("updated_at"),
395
+ ),
396
+ (
397
+ "projects",
398
+ comprehensive_metadata_plain.get("projects", []),
399
+ ),
400
+ ("master_storage_path", str(master_storage_path)),
401
+ ("readable_name", readable_name),
402
+ ("metadata_file", str(master_metadata_file)),
403
+ (
404
+ "pdf_downloaded_at",
405
+ comprehensive_metadata_plain.get("pdf_downloaded_at"),
406
+ ),
407
+ (
408
+ "pdf_size_bytes",
409
+ comprehensive_metadata_plain.get("pdf_size_bytes"),
410
+ ),
411
+ ]
412
+ ),
413
+ ),
414
+ ]
415
+ )
416
+
417
+ def save_unresolved_paper(
418
+ self,
419
+ title: str,
420
+ year: Optional[int] = None,
421
+ authors: Optional[List[str]] = None,
422
+ reason: str = "DOI not found",
423
+ bibtex_source: Optional[str] = None,
424
+ ) -> None:
425
+ """Save paper that couldn't be resolved to unresolved directory."""
426
+ clean_title = TextNormalizer.clean_metadata_text(title) if title else ""
427
+ unresolved_info = {
428
+ "title": clean_title,
429
+ "year": year,
430
+ "authors": authors or [],
431
+ "reason": reason,
432
+ "bibtex_source": bibtex_source,
433
+ "project": self.project,
434
+ "created_at": datetime.now().isoformat(),
435
+ "created_by": "SciTeX Scholar",
436
+ }
437
+
438
+ project_lib_path = (
439
+ self.config.path_manager.get_scholar_library_path() / self.project
440
+ )
441
+ unresolved_dir = project_lib_path / "unresolved"
442
+ unresolved_dir.mkdir(parents=True, exist_ok=True)
443
+
444
+ safe_title = title or "untitled"
445
+ safe_title = re.sub(r"[^\w\s-]", "", safe_title)[:50]
446
+ safe_title = re.sub(r"[-\s]+", "_", safe_title)
447
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
448
+ unresolved_file = unresolved_dir / f"{safe_title}_{timestamp}.json"
449
+
450
+ with open(unresolved_file, "w") as file_:
451
+ json.dump(unresolved_info, file_, indent=2, ensure_ascii=False)
452
+
453
+ logger.warning(f"Saved unresolved entry: {unresolved_file.name}")
454
+
455
+
456
+ # EOF