scitex 2.14.0__py3-none-any.whl → 2.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +27 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +17 -210
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +160 -41
  68. scitex/cli/capture.py +133 -20
  69. scitex/cli/introspect.py +488 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +414 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +154 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +355 -0
  79. scitex/cli/stats.py +136 -11
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +49 -299
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +48 -56
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/security/README.md +3 -3
  178. scitex/session/README.md +1 -1
  179. scitex/session/__init__.py +26 -7
  180. scitex/session/_decorator.py +1 -1
  181. scitex/sh/README.md +1 -1
  182. scitex/sh/__init__.py +7 -4
  183. scitex/social/__init__.py +155 -0
  184. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  185. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  186. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  187. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  188. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  189. scitex/stats/_mcp/_handlers/_format.py +94 -0
  190. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  191. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  192. scitex/stats/_mcp/_handlers/_power.py +247 -0
  193. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  194. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  195. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  196. scitex/stats/_mcp/handlers.py +19 -1171
  197. scitex/stats/auto/_stat_style.py +175 -0
  198. scitex/stats/auto/_style_definitions.py +411 -0
  199. scitex/stats/auto/_styles.py +22 -620
  200. scitex/stats/descriptive/__init__.py +11 -8
  201. scitex/stats/descriptive/_ci.py +39 -0
  202. scitex/stats/power/_power.py +15 -4
  203. scitex/str/__init__.py +2 -1
  204. scitex/str/_title_case.py +63 -0
  205. scitex/template/README.md +1 -1
  206. scitex/template/__init__.py +25 -10
  207. scitex/template/_code_templates.py +147 -0
  208. scitex/template/_mcp/handlers.py +81 -0
  209. scitex/template/_mcp/tool_schemas.py +55 -0
  210. scitex/template/_templates/__init__.py +51 -0
  211. scitex/template/_templates/audio.py +233 -0
  212. scitex/template/_templates/canvas.py +312 -0
  213. scitex/template/_templates/capture.py +268 -0
  214. scitex/template/_templates/config.py +43 -0
  215. scitex/template/_templates/diagram.py +294 -0
  216. scitex/template/_templates/io.py +107 -0
  217. scitex/template/_templates/module.py +53 -0
  218. scitex/template/_templates/plt.py +202 -0
  219. scitex/template/_templates/scholar.py +267 -0
  220. scitex/template/_templates/session.py +130 -0
  221. scitex/template/_templates/session_minimal.py +43 -0
  222. scitex/template/_templates/session_plot.py +67 -0
  223. scitex/template/_templates/session_stats.py +77 -0
  224. scitex/template/_templates/stats.py +323 -0
  225. scitex/template/_templates/writer.py +296 -0
  226. scitex/template/clone_writer_directory.py +5 -5
  227. scitex/ui/_backends/_email.py +10 -2
  228. scitex/ui/_backends/_webhook.py +5 -1
  229. scitex/web/_search_pubmed.py +10 -6
  230. scitex/writer/README.md +1 -1
  231. scitex/writer/__init__.py +43 -34
  232. scitex/writer/_mcp/handlers.py +11 -744
  233. scitex/writer/_mcp/tool_schemas.py +5 -335
  234. scitex-2.15.3.dist-info/METADATA +667 -0
  235. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/RECORD +241 -120
  236. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  237. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  238. scitex/diagram/_compile.py +0 -312
  239. scitex/diagram/_diagram.py +0 -355
  240. scitex/diagram/_mcp/__init__.py +0 -4
  241. scitex/diagram/_mcp/handlers.py +0 -400
  242. scitex/diagram/_mcp/tool_schemas.py +0 -157
  243. scitex/diagram/_presets.py +0 -173
  244. scitex/diagram/_schema.py +0 -182
  245. scitex/diagram/_split.py +0 -278
  246. scitex/gen/_ci.py +0 -12
  247. scitex/gen/_title_case.py +0 -89
  248. scitex/plt/_mcp/__init__.py +0 -4
  249. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  250. scitex/plt/_mcp/_handlers_figure.py +0 -195
  251. scitex/plt/_mcp/_handlers_plot.py +0 -252
  252. scitex/plt/_mcp/_handlers_style.py +0 -219
  253. scitex/plt/_mcp/handlers.py +0 -74
  254. scitex/plt/_mcp/tool_schemas.py +0 -497
  255. scitex/plt/mcp_server.py +0 -231
  256. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  257. scitex/scholar/examples/dev.py +0 -38
  258. scitex-2.14.0.dist-info/METADATA +0 -1238
  259. /scitex/{gen → context}/_detect_environment.py +0 -0
  260. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  261. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  262. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/WHEEL +0 -0
  263. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/entry_points.txt +0 -0
  264. {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,376 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_resolution.py
4
+
5
+ """
6
+ DOI resolution mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from scitex import logging
17
+ from scitex.scholar.utils import TextNormalizer
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ResolutionMixin:
23
+ """Mixin providing DOI resolution methods."""
24
+
25
+ def check_library_for_doi(
26
+ self, title: str, year: Optional[int] = None
27
+ ) -> Optional[str]:
28
+ """Check if DOI already exists in master Scholar library."""
29
+ try:
30
+ for paper_dir in self.library_master_dir.iterdir():
31
+ if not paper_dir.is_dir():
32
+ continue
33
+
34
+ metadata_file = paper_dir / "metadata.json"
35
+ if metadata_file.exists():
36
+ try:
37
+ with open(metadata_file) as file_:
38
+ metadata = json.load(file_)
39
+
40
+ stored_title = metadata.get("title", "")
41
+ stored_year = metadata.get("year")
42
+ stored_doi = metadata.get("doi")
43
+
44
+ title_match = self._is_title_similar(title, stored_title)
45
+ year_match = (
46
+ not year
47
+ or not stored_year
48
+ or abs(int(stored_year) - int(year)) <= 1
49
+ if isinstance(stored_year, (int, str))
50
+ and str(stored_year).isdigit()
51
+ else stored_year == year
52
+ )
53
+
54
+ if title_match and year_match and stored_doi:
55
+ logger.info(
56
+ f"DOI found in master Scholar library: {stored_doi} (paper_id: {paper_dir.name})"
57
+ )
58
+ return stored_doi
59
+
60
+ except (json.JSONDecodeError, KeyError, ValueError) as exc_:
61
+ logger.debug(
62
+ f"Error reading metadata from {metadata_file}: {exc_}"
63
+ )
64
+ continue
65
+
66
+ return None
67
+
68
+ except Exception as exc_:
69
+ logger.debug(f"Error checking master Scholar library: {exc_}")
70
+ return None
71
+
72
+ async def resolve_and_create_library_structure_async(
73
+ self,
74
+ papers: List[Dict[str, Any]],
75
+ project: str,
76
+ sources: Optional[List[str]] = None,
77
+ ) -> Dict[str, Dict[str, str]]:
78
+ """Resolve DOIs and create full Scholar library structure with proper paths."""
79
+ if not self.single_doi_resolver:
80
+ raise ValueError("SingleDOIResolver is required for resolving DOIs")
81
+
82
+ results = {}
83
+ for paper in papers:
84
+ title = paper.get("title")
85
+ if not title:
86
+ logger.warning(f"Skipping paper without title: {paper}")
87
+ continue
88
+
89
+ logger.info(f"Processing: {title[:50]}...")
90
+
91
+ try:
92
+ doi_result = await self.single_doi_resolver.metadata2doi_async(
93
+ title=title,
94
+ year=paper.get("year"),
95
+ authors=paper.get("authors"),
96
+ sources=sources,
97
+ )
98
+
99
+ enhanced_metadata = self._extract_enhanced_metadata(doi_result, paper)
100
+ paper_info = {**paper, **enhanced_metadata}
101
+
102
+ storage_paths = self._call_path_manager_get_storage_paths(
103
+ paper_info=paper_info, collection_name="MASTER"
104
+ )
105
+ paper_id = storage_paths["unique_id"]
106
+ storage_path = storage_paths["storage_path"]
107
+ metadata_file = storage_path / "metadata.json"
108
+
109
+ complete_metadata = self._create_complete_metadata(
110
+ paper, doi_result, paper_id, enhanced_metadata
111
+ )
112
+
113
+ with open(metadata_file, "w") as file_:
114
+ json.dump(complete_metadata, file_, indent=2)
115
+
116
+ logger.success(
117
+ f"Saved metadata.json for {paper_id} ({len(complete_metadata)} fields)"
118
+ )
119
+
120
+ project_symlink_path = self._create_project_symlink(
121
+ master_storage_path=storage_path,
122
+ project=project,
123
+ readable_name=storage_paths["readable_name"],
124
+ )
125
+
126
+ bibtex_source_filename = getattr(self, "_source_filename", "papers")
127
+ info_dir = self._create_bibtex_info_structure(
128
+ project=project,
129
+ paper_info={**paper, **enhanced_metadata},
130
+ complete_metadata=complete_metadata,
131
+ bibtex_source_filename=bibtex_source_filename,
132
+ )
133
+
134
+ results[title] = {
135
+ "scitex_id": paper_id,
136
+ "scholar_id": paper_id,
137
+ "doi": complete_metadata.get("doi"),
138
+ "master_storage_path": str(storage_path),
139
+ "project_symlink_path": str(project_symlink_path)
140
+ if project_symlink_path
141
+ else None,
142
+ "readable_name": storage_paths["readable_name"],
143
+ "metadata_file": str(metadata_file),
144
+ "info_dir": str(info_dir) if info_dir else None,
145
+ }
146
+
147
+ logger.info(f"Created library entry: {paper_id}")
148
+ if complete_metadata.get("doi"):
149
+ logger.info(f" DOI: {complete_metadata['doi']}")
150
+ logger.info(f" Storage: {storage_path}")
151
+
152
+ except Exception as exc_:
153
+ logger.error(f"Error processing '{title[:30]}...': {exc_}")
154
+
155
+ logger.success(
156
+ f"Created Scholar library entries for {len(results)}/{len(papers)} papers"
157
+ )
158
+ return results
159
+
160
+ async def resolve_and_create_library_structure_with_source_async(
161
+ self,
162
+ papers: List[Dict[str, Any]],
163
+ project: str,
164
+ sources: Optional[List[str]] = None,
165
+ bibtex_source_filename: str = "papers",
166
+ ) -> Dict[str, Dict[str, str]]:
167
+ """Enhanced version that passes source filename for BibTeX structure."""
168
+ self._source_filename = bibtex_source_filename
169
+ return await self.resolve_and_create_library_structure_async(
170
+ papers=papers, project=project, sources=sources
171
+ )
172
+
173
+ def _extract_enhanced_metadata(
174
+ self, doi_result: Optional[Dict], paper: Dict
175
+ ) -> Dict[str, Any]:
176
+ """Extract enhanced metadata from DOI resolution result."""
177
+ enhanced = {}
178
+ if doi_result and isinstance(doi_result, dict):
179
+ metadata_source = doi_result.get("metadata", {})
180
+ enhanced.update(
181
+ {
182
+ "doi": doi_result.get("doi"),
183
+ "journal": metadata_source.get("journal")
184
+ or doi_result.get("journal")
185
+ or paper.get("journal"),
186
+ "authors": metadata_source.get("authors")
187
+ or doi_result.get("authors")
188
+ or paper.get("authors"),
189
+ "year": metadata_source.get("year")
190
+ or doi_result.get("year")
191
+ or paper.get("year"),
192
+ "title": metadata_source.get("title")
193
+ or doi_result.get("title")
194
+ or paper.get("title"),
195
+ "abstract": metadata_source.get("abstract")
196
+ or doi_result.get("abstract"),
197
+ "publisher": metadata_source.get("publisher")
198
+ or doi_result.get("publisher"),
199
+ "volume": metadata_source.get("volume") or doi_result.get("volume"),
200
+ "issue": metadata_source.get("issue") or doi_result.get("issue"),
201
+ "pages": metadata_source.get("pages") or doi_result.get("pages"),
202
+ "issn": metadata_source.get("issn") or doi_result.get("issn"),
203
+ "short_journal": metadata_source.get("short_journal")
204
+ or doi_result.get("short_journal"),
205
+ }
206
+ )
207
+
208
+ if doi_result.get("doi"):
209
+ logger.success(
210
+ f"Enhanced metadata from DOI source: {dict(metadata_source)}"
211
+ )
212
+
213
+ return enhanced
214
+
215
+ def _create_complete_metadata(
216
+ self,
217
+ paper: Dict,
218
+ doi_result: Optional[Dict],
219
+ paper_id: str,
220
+ enhanced_metadata: Dict,
221
+ ) -> Dict[str, Any]:
222
+ """Create complete metadata dictionary with source tracking."""
223
+ raw_title = enhanced_metadata.get("title") or paper.get("title")
224
+ clean_title = TextNormalizer.clean_metadata_text(raw_title) if raw_title else ""
225
+ raw_abstract = None
226
+ if enhanced_metadata.get("abstract"):
227
+ raw_abstract = TextNormalizer.clean_metadata_text(
228
+ enhanced_metadata["abstract"]
229
+ )
230
+
231
+ doi_source_value = self._get_doi_source_value(doi_result)
232
+
233
+ complete_metadata = {
234
+ "title": clean_title,
235
+ "title_source": doi_source_value
236
+ if enhanced_metadata.get("title") != paper.get("title")
237
+ else "manual",
238
+ "authors": enhanced_metadata.get("authors") or paper.get("authors"),
239
+ "authors_source": doi_source_value
240
+ if enhanced_metadata.get("authors") != paper.get("authors")
241
+ else ("manual" if paper.get("authors") else None),
242
+ "year": enhanced_metadata.get("year") or paper.get("year"),
243
+ "year_source": doi_source_value
244
+ if enhanced_metadata.get("year") != paper.get("year")
245
+ else ("manual" if paper.get("year") else None),
246
+ "journal": enhanced_metadata.get("journal") or paper.get("journal"),
247
+ "journal_source": doi_source_value
248
+ if enhanced_metadata.get("journal") != paper.get("journal")
249
+ else ("manual" if paper.get("journal") else None),
250
+ "abstract": raw_abstract,
251
+ "abstract_source": doi_source_value
252
+ if enhanced_metadata.get("abstract")
253
+ else None,
254
+ "scitex_id": paper_id,
255
+ "created_at": datetime.now().isoformat(),
256
+ "created_by": "SciTeX Scholar",
257
+ }
258
+
259
+ if doi_result and isinstance(doi_result, dict):
260
+ safe_fields = [
261
+ "publisher",
262
+ "volume",
263
+ "issue",
264
+ "pages",
265
+ "issn",
266
+ "short_journal",
267
+ ]
268
+ for field in safe_fields:
269
+ value = enhanced_metadata.get(field)
270
+ if value is not None:
271
+ complete_metadata[field] = value
272
+ complete_metadata[f"{field}_source"] = (
273
+ doi_source_value or "unknown_api"
274
+ )
275
+
276
+ if doi_result and doi_result.get("doi"):
277
+ complete_metadata.update(
278
+ {"doi": doi_result["doi"], "doi_source": doi_source_value}
279
+ )
280
+ logger.success(f"DOI resolved for {paper_id}: {doi_result['doi']}")
281
+ else:
282
+ complete_metadata.update(
283
+ {"doi": None, "doi_source": None, "doi_resolution_failed": True}
284
+ )
285
+ logger.warning(
286
+ f"DOI resolution failed for {paper_id}: {paper.get('title', '')[:40]}..."
287
+ )
288
+
289
+ self._add_standard_fields(complete_metadata)
290
+
291
+ storage_paths = self._call_path_manager_get_storage_paths(
292
+ paper_info={**paper, **enhanced_metadata}, collection_name="MASTER"
293
+ )
294
+ storage_path = storage_paths["storage_path"]
295
+
296
+ complete_metadata.update(
297
+ {
298
+ "master_storage_path": str(storage_path),
299
+ "readable_name": storage_paths["readable_name"],
300
+ "metadata_file": str(storage_path / "metadata.json"),
301
+ }
302
+ )
303
+
304
+ return complete_metadata
305
+
306
+ def _get_doi_source_value(self, doi_result: Optional[Dict]) -> Optional[str]:
307
+ """Get normalized DOI source value."""
308
+ if not doi_result or not doi_result.get("source"):
309
+ return None
310
+
311
+ source = doi_result["source"]
312
+ if "crossref" in source.lower():
313
+ return "crossref"
314
+ elif "semantic" in source.lower():
315
+ return "semantic_scholar"
316
+ elif "pubmed" in source.lower():
317
+ return "pubmed"
318
+ elif "openalex" in source.lower():
319
+ return "openalex"
320
+ return source
321
+
322
+ def _add_standard_fields(self, complete_metadata: Dict) -> None:
323
+ """Add standard fields with None defaults."""
324
+ standard_fields = {
325
+ "keywords": None,
326
+ "references": None,
327
+ "venue": None,
328
+ "publisher": None,
329
+ "volume": None,
330
+ "issue": None,
331
+ "pages": None,
332
+ "issn": None,
333
+ "short_journal": None,
334
+ }
335
+
336
+ missing_fields = []
337
+ for field, default_value in standard_fields.items():
338
+ if field not in complete_metadata or complete_metadata[field] is None:
339
+ complete_metadata[field] = default_value
340
+ missing_fields.append(field)
341
+
342
+ if missing_fields:
343
+ logger.info(
344
+ f"Missing fields for future enhancement: {', '.join(missing_fields)}"
345
+ )
346
+
347
+ def _is_title_similar(
348
+ self, title1: str, title2: str, threshold: float = 0.7
349
+ ) -> bool:
350
+ """Check if two titles are similar enough to be considered the same paper."""
351
+ if not title1 or not title2:
352
+ return False
353
+
354
+ def normalize_title(title: str) -> str:
355
+ title = title.lower()
356
+ title = re.sub(r"[^\w\s]", " ", title)
357
+ title = re.sub(r"\s+", " ", title)
358
+ return title.strip()
359
+
360
+ norm_title1 = normalize_title(title1)
361
+ norm_title2 = normalize_title(title2)
362
+
363
+ words1 = set(norm_title1.split())
364
+ words2 = set(norm_title2.split())
365
+
366
+ if not words1 or not words2:
367
+ return False
368
+
369
+ intersection = len(words1.intersection(words2))
370
+ union = len(words1.union(words2))
371
+ similarity = intersection / union if union > 0 else 0.0
372
+
373
+ return similarity >= threshold
374
+
375
+
376
+ # EOF
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_storage_helpers.py
4
+
5
+ """
6
+ Storage helper mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from datetime import datetime
13
+ from typing import TYPE_CHECKING, Dict, Optional
14
+
15
+ from scitex import logging
16
+
17
+ if TYPE_CHECKING:
18
+ from scitex.scholar.core.Paper import Paper
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class StorageHelpersMixin:
24
+ """Mixin providing storage helper methods."""
25
+
26
+ def has_metadata(self, paper_id: str) -> bool:
27
+ """Check if metadata.json exists for paper."""
28
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
29
+ return metadata_file.exists()
30
+
31
+ def has_urls(self, paper_id: str) -> bool:
32
+ """Check if PDF URLs exist in metadata."""
33
+ if not self.has_metadata(paper_id):
34
+ return False
35
+
36
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
37
+ try:
38
+ with open(metadata_file) as f:
39
+ data = json.load(f)
40
+
41
+ urls = data.get("metadata", {}).get("url", {}).get("pdfs", [])
42
+ return len(urls) > 0
43
+ except Exception:
44
+ return False
45
+
46
+ def has_pdf(self, paper_id: str) -> bool:
47
+ """Check if PDF file exists in storage."""
48
+ paper_dir = self.library_master_dir / paper_id
49
+ if not paper_dir.exists():
50
+ return False
51
+
52
+ pdf_files = list(paper_dir.glob("*.pdf"))
53
+ return len(pdf_files) > 0
54
+
55
+ def load_paper_from_id(self, paper_id: str) -> Optional[Paper]:
56
+ """Load Paper object from storage by ID."""
57
+ from scitex.scholar.core.Paper import Paper
58
+
59
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
60
+
61
+ if not metadata_file.exists():
62
+ return None
63
+
64
+ try:
65
+ with open(metadata_file) as f:
66
+ data = json.load(f)
67
+
68
+ paper = Paper.from_dict(data)
69
+ return paper
70
+
71
+ except Exception as e:
72
+ logger.error(f"Failed to load paper {paper_id}: {e}")
73
+ return None
74
+
75
+ def save_paper_incremental(self, paper_id: str, paper: Paper) -> None:
76
+ """Save Paper object to storage (incremental update)."""
77
+ storage_path = self.library_master_dir / paper_id
78
+ storage_path.mkdir(parents=True, exist_ok=True)
79
+
80
+ metadata_file = storage_path / "metadata.json"
81
+
82
+ existing_data = {}
83
+ if metadata_file.exists():
84
+ try:
85
+ with open(metadata_file) as f:
86
+ existing_data = json.load(f)
87
+ except Exception:
88
+ pass
89
+
90
+ new_data = paper.model_dump()
91
+ merged_data = self._merge_metadata(existing_data, new_data)
92
+
93
+ if "container" not in merged_data:
94
+ merged_data["container"] = {}
95
+ merged_data["container"]["updated_at"] = datetime.now().isoformat()
96
+
97
+ with open(metadata_file, "w") as f:
98
+ json.dump(merged_data, f, indent=2, ensure_ascii=False)
99
+
100
+ logger.debug(f"Saved paper {paper_id} to storage")
101
+
102
+ def _merge_metadata(self, existing: Dict, new: Dict) -> Dict:
103
+ """Recursively merge metadata dicts, preferring new non-None values."""
104
+ result = existing.copy()
105
+
106
+ for key, new_value in new.items():
107
+ if key not in result:
108
+ result[key] = new_value
109
+ elif new_value is None:
110
+ pass
111
+ elif isinstance(new_value, dict) and isinstance(result[key], dict):
112
+ result[key] = self._merge_metadata(result[key], new_value)
113
+ elif isinstance(new_value, list) and len(new_value) > 0:
114
+ result[key] = new_value
115
+ elif new_value:
116
+ result[key] = new_value
117
+
118
+ return result
119
+
120
+
121
+ # EOF