scitex 2.14.0__py3-none-any.whl → 2.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. scitex/__init__.py +71 -17
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +210 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/template.py +24 -0
  17. scitex/_mcp_tools/writer.py +21 -204
  18. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  19. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  20. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  21. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  22. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  23. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  24. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  25. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  26. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  27. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  28. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  29. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  30. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  31. scitex/audio/README.md +40 -36
  32. scitex/audio/__init__.py +129 -61
  33. scitex/audio/_branding.py +185 -0
  34. scitex/audio/_mcp/__init__.py +32 -0
  35. scitex/audio/_mcp/handlers.py +59 -6
  36. scitex/audio/_mcp/speak_handlers.py +238 -0
  37. scitex/audio/_relay.py +225 -0
  38. scitex/audio/_tts.py +18 -10
  39. scitex/audio/engines/base.py +17 -10
  40. scitex/audio/engines/elevenlabs_engine.py +7 -2
  41. scitex/audio/mcp_server.py +228 -75
  42. scitex/canvas/README.md +1 -1
  43. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  44. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  45. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  46. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  47. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  48. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  49. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  50. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  51. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  52. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  53. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  54. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  55. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  56. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  57. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  58. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  59. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  60. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  61. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  62. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  63. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  64. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  65. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  66. scitex/cli/__init__.py +38 -43
  67. scitex/cli/audio.py +76 -27
  68. scitex/cli/capture.py +13 -20
  69. scitex/cli/introspect.py +481 -0
  70. scitex/cli/main.py +200 -109
  71. scitex/cli/mcp.py +60 -34
  72. scitex/cli/plt.py +357 -0
  73. scitex/cli/repro.py +15 -8
  74. scitex/cli/resource.py +15 -8
  75. scitex/cli/scholar/__init__.py +23 -8
  76. scitex/cli/scholar/_crossref_scitex.py +296 -0
  77. scitex/cli/scholar/_fetch.py +25 -3
  78. scitex/cli/social.py +314 -0
  79. scitex/cli/stats.py +15 -8
  80. scitex/cli/template.py +129 -12
  81. scitex/cli/tex.py +15 -8
  82. scitex/cli/writer.py +132 -8
  83. scitex/cloud/__init__.py +41 -2
  84. scitex/config/README.md +1 -1
  85. scitex/config/__init__.py +16 -2
  86. scitex/config/_env_registry.py +256 -0
  87. scitex/context/__init__.py +22 -0
  88. scitex/dev/__init__.py +20 -1
  89. scitex/diagram/__init__.py +42 -19
  90. scitex/diagram/mcp_server.py +13 -125
  91. scitex/gen/__init__.py +50 -14
  92. scitex/gen/_list_packages.py +4 -4
  93. scitex/introspect/__init__.py +82 -0
  94. scitex/introspect/_call_graph.py +303 -0
  95. scitex/introspect/_class_hierarchy.py +163 -0
  96. scitex/introspect/_core.py +41 -0
  97. scitex/introspect/_docstring.py +131 -0
  98. scitex/introspect/_examples.py +113 -0
  99. scitex/introspect/_imports.py +271 -0
  100. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
  101. scitex/introspect/_mcp/__init__.py +41 -0
  102. scitex/introspect/_mcp/handlers.py +233 -0
  103. scitex/introspect/_members.py +155 -0
  104. scitex/introspect/_resolve.py +89 -0
  105. scitex/introspect/_signature.py +131 -0
  106. scitex/introspect/_source.py +80 -0
  107. scitex/introspect/_type_hints.py +172 -0
  108. scitex/io/_save.py +1 -2
  109. scitex/io/bundle/README.md +1 -1
  110. scitex/logging/_formatters.py +19 -9
  111. scitex/mcp_server.py +98 -5
  112. scitex/os/__init__.py +4 -0
  113. scitex/{gen → os}/_check_host.py +4 -5
  114. scitex/plt/__init__.py +245 -550
  115. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  116. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  117. scitex/plt/gallery/README.md +1 -1
  118. scitex/plt/utils/_hitmap/__init__.py +82 -0
  119. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  120. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  121. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  122. scitex/plt/utils/_hitmap/_constants.py +40 -0
  123. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  124. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  125. scitex/plt/utils/_hitmap/_query.py +113 -0
  126. scitex/plt/utils/_hitmap.py +46 -1616
  127. scitex/plt/utils/_metadata/__init__.py +80 -0
  128. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  129. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  130. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  131. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  132. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  133. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  134. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  135. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  136. scitex/plt/utils/_metadata/_csv.py +416 -0
  137. scitex/plt/utils/_metadata/_detect.py +225 -0
  138. scitex/plt/utils/_metadata/_legend.py +127 -0
  139. scitex/plt/utils/_metadata/_rounding.py +117 -0
  140. scitex/plt/utils/_metadata/_verification.py +202 -0
  141. scitex/schema/README.md +1 -1
  142. scitex/scholar/__init__.py +8 -0
  143. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  144. scitex/scholar/core/Scholar.py +63 -1700
  145. scitex/scholar/core/_mixins/__init__.py +36 -0
  146. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  147. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  148. scitex/scholar/core/_mixins/_loaders.py +103 -0
  149. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  150. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  151. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  152. scitex/scholar/core/_mixins/_savers.py +69 -0
  153. scitex/scholar/core/_mixins/_search.py +103 -0
  154. scitex/scholar/core/_mixins/_services.py +88 -0
  155. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  156. scitex/scholar/crossref_scitex.py +367 -0
  157. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  158. scitex/scholar/examples/00_run_all.sh +120 -0
  159. scitex/scholar/jobs/_executors.py +27 -3
  160. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  161. scitex/scholar/pdf_download/_cli.py +154 -0
  162. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  163. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  164. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  165. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  166. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  167. scitex/scholar/pipelines/_single_steps.py +71 -36
  168. scitex/scholar/storage/_LibraryManager.py +97 -1695
  169. scitex/scholar/storage/_mixins/__init__.py +30 -0
  170. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  171. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  172. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  173. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  174. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  175. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  176. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  177. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  178. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  179. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  180. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  181. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  182. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  183. scitex/security/README.md +3 -3
  184. scitex/session/README.md +1 -1
  185. scitex/session/__init__.py +26 -7
  186. scitex/session/_decorator.py +1 -1
  187. scitex/sh/README.md +1 -1
  188. scitex/sh/__init__.py +7 -4
  189. scitex/social/__init__.py +155 -0
  190. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  191. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  192. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  193. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  194. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  195. scitex/stats/_mcp/_handlers/_format.py +94 -0
  196. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  197. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  198. scitex/stats/_mcp/_handlers/_power.py +247 -0
  199. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  200. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  201. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  202. scitex/stats/_mcp/handlers.py +19 -1171
  203. scitex/stats/auto/_stat_style.py +175 -0
  204. scitex/stats/auto/_style_definitions.py +411 -0
  205. scitex/stats/auto/_styles.py +22 -620
  206. scitex/stats/descriptive/__init__.py +11 -8
  207. scitex/stats/descriptive/_ci.py +39 -0
  208. scitex/stats/power/_power.py +15 -4
  209. scitex/str/__init__.py +2 -1
  210. scitex/str/_title_case.py +63 -0
  211. scitex/template/README.md +1 -1
  212. scitex/template/__init__.py +25 -10
  213. scitex/template/_code_templates.py +147 -0
  214. scitex/template/_mcp/handlers.py +81 -0
  215. scitex/template/_mcp/tool_schemas.py +55 -0
  216. scitex/template/_templates/__init__.py +51 -0
  217. scitex/template/_templates/audio.py +233 -0
  218. scitex/template/_templates/canvas.py +312 -0
  219. scitex/template/_templates/capture.py +268 -0
  220. scitex/template/_templates/config.py +43 -0
  221. scitex/template/_templates/diagram.py +294 -0
  222. scitex/template/_templates/io.py +107 -0
  223. scitex/template/_templates/module.py +53 -0
  224. scitex/template/_templates/plt.py +202 -0
  225. scitex/template/_templates/scholar.py +267 -0
  226. scitex/template/_templates/session.py +130 -0
  227. scitex/template/_templates/session_minimal.py +43 -0
  228. scitex/template/_templates/session_plot.py +67 -0
  229. scitex/template/_templates/session_stats.py +77 -0
  230. scitex/template/_templates/stats.py +323 -0
  231. scitex/template/_templates/writer.py +296 -0
  232. scitex/template/clone_writer_directory.py +5 -5
  233. scitex/ui/_backends/_email.py +10 -2
  234. scitex/ui/_backends/_webhook.py +5 -1
  235. scitex/web/_search_pubmed.py +10 -6
  236. scitex/writer/README.md +1 -1
  237. scitex/writer/_mcp/handlers.py +11 -744
  238. scitex/writer/_mcp/tool_schemas.py +5 -335
  239. scitex-2.15.2.dist-info/METADATA +648 -0
  240. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/RECORD +246 -150
  241. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  242. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  243. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  244. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  245. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  246. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  247. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  248. scitex/diagram/_compile.py +0 -312
  249. scitex/diagram/_diagram.py +0 -355
  250. scitex/diagram/_mcp/__init__.py +0 -4
  251. scitex/diagram/_mcp/handlers.py +0 -400
  252. scitex/diagram/_mcp/tool_schemas.py +0 -157
  253. scitex/diagram/_presets.py +0 -173
  254. scitex/diagram/_schema.py +0 -182
  255. scitex/diagram/_split.py +0 -278
  256. scitex/gen/_ci.py +0 -12
  257. scitex/gen/_title_case.py +0 -89
  258. scitex/plt/_mcp/__init__.py +0 -4
  259. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  260. scitex/plt/_mcp/_handlers_figure.py +0 -195
  261. scitex/plt/_mcp/_handlers_plot.py +0 -252
  262. scitex/plt/_mcp/_handlers_style.py +0 -219
  263. scitex/plt/_mcp/handlers.py +0 -74
  264. scitex/plt/_mcp/tool_schemas.py +0 -497
  265. scitex/plt/mcp_server.py +0 -231
  266. scitex/scholar/data/.gitkeep +0 -0
  267. scitex/scholar/data/README.md +0 -44
  268. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  269. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  270. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  271. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  272. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  273. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  274. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  275. scitex/scholar/data/bib_files/pac.bib +0 -698
  276. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  277. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  278. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  279. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  280. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  281. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  282. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  283. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  284. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  285. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  286. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  287. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  288. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  289. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  290. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  291. scitex/scholar/data/impact_factor.db +0 -0
  292. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  293. scitex/scholar/examples/dev.py +0 -38
  294. scitex-2.14.0.dist-info/METADATA +0 -1238
  295. /scitex/{gen → context}/_detect_environment.py +0 -0
  296. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  297. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  298. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
  299. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
  300. {scitex-2.14.0.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,376 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_resolution.py
4
+
5
+ """
6
+ DOI resolution mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from scitex import logging
17
+ from scitex.scholar.utils import TextNormalizer
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ResolutionMixin:
23
+ """Mixin providing DOI resolution methods."""
24
+
25
+ def check_library_for_doi(
26
+ self, title: str, year: Optional[int] = None
27
+ ) -> Optional[str]:
28
+ """Check if DOI already exists in master Scholar library."""
29
+ try:
30
+ for paper_dir in self.library_master_dir.iterdir():
31
+ if not paper_dir.is_dir():
32
+ continue
33
+
34
+ metadata_file = paper_dir / "metadata.json"
35
+ if metadata_file.exists():
36
+ try:
37
+ with open(metadata_file) as file_:
38
+ metadata = json.load(file_)
39
+
40
+ stored_title = metadata.get("title", "")
41
+ stored_year = metadata.get("year")
42
+ stored_doi = metadata.get("doi")
43
+
44
+ title_match = self._is_title_similar(title, stored_title)
45
+ year_match = (
46
+ not year
47
+ or not stored_year
48
+ or abs(int(stored_year) - int(year)) <= 1
49
+ if isinstance(stored_year, (int, str))
50
+ and str(stored_year).isdigit()
51
+ else stored_year == year
52
+ )
53
+
54
+ if title_match and year_match and stored_doi:
55
+ logger.info(
56
+ f"DOI found in master Scholar library: {stored_doi} (paper_id: {paper_dir.name})"
57
+ )
58
+ return stored_doi
59
+
60
+ except (json.JSONDecodeError, KeyError, ValueError) as exc_:
61
+ logger.debug(
62
+ f"Error reading metadata from {metadata_file}: {exc_}"
63
+ )
64
+ continue
65
+
66
+ return None
67
+
68
+ except Exception as exc_:
69
+ logger.debug(f"Error checking master Scholar library: {exc_}")
70
+ return None
71
+
72
+ async def resolve_and_create_library_structure_async(
73
+ self,
74
+ papers: List[Dict[str, Any]],
75
+ project: str,
76
+ sources: Optional[List[str]] = None,
77
+ ) -> Dict[str, Dict[str, str]]:
78
+ """Resolve DOIs and create full Scholar library structure with proper paths."""
79
+ if not self.single_doi_resolver:
80
+ raise ValueError("SingleDOIResolver is required for resolving DOIs")
81
+
82
+ results = {}
83
+ for paper in papers:
84
+ title = paper.get("title")
85
+ if not title:
86
+ logger.warning(f"Skipping paper without title: {paper}")
87
+ continue
88
+
89
+ logger.info(f"Processing: {title[:50]}...")
90
+
91
+ try:
92
+ doi_result = await self.single_doi_resolver.metadata2doi_async(
93
+ title=title,
94
+ year=paper.get("year"),
95
+ authors=paper.get("authors"),
96
+ sources=sources,
97
+ )
98
+
99
+ enhanced_metadata = self._extract_enhanced_metadata(doi_result, paper)
100
+ paper_info = {**paper, **enhanced_metadata}
101
+
102
+ storage_paths = self._call_path_manager_get_storage_paths(
103
+ paper_info=paper_info, collection_name="MASTER"
104
+ )
105
+ paper_id = storage_paths["unique_id"]
106
+ storage_path = storage_paths["storage_path"]
107
+ metadata_file = storage_path / "metadata.json"
108
+
109
+ complete_metadata = self._create_complete_metadata(
110
+ paper, doi_result, paper_id, enhanced_metadata
111
+ )
112
+
113
+ with open(metadata_file, "w") as file_:
114
+ json.dump(complete_metadata, file_, indent=2)
115
+
116
+ logger.success(
117
+ f"Saved metadata.json for {paper_id} ({len(complete_metadata)} fields)"
118
+ )
119
+
120
+ project_symlink_path = self._create_project_symlink(
121
+ master_storage_path=storage_path,
122
+ project=project,
123
+ readable_name=storage_paths["readable_name"],
124
+ )
125
+
126
+ bibtex_source_filename = getattr(self, "_source_filename", "papers")
127
+ info_dir = self._create_bibtex_info_structure(
128
+ project=project,
129
+ paper_info={**paper, **enhanced_metadata},
130
+ complete_metadata=complete_metadata,
131
+ bibtex_source_filename=bibtex_source_filename,
132
+ )
133
+
134
+ results[title] = {
135
+ "scitex_id": paper_id,
136
+ "scholar_id": paper_id,
137
+ "doi": complete_metadata.get("doi"),
138
+ "master_storage_path": str(storage_path),
139
+ "project_symlink_path": str(project_symlink_path)
140
+ if project_symlink_path
141
+ else None,
142
+ "readable_name": storage_paths["readable_name"],
143
+ "metadata_file": str(metadata_file),
144
+ "info_dir": str(info_dir) if info_dir else None,
145
+ }
146
+
147
+ logger.info(f"Created library entry: {paper_id}")
148
+ if complete_metadata.get("doi"):
149
+ logger.info(f" DOI: {complete_metadata['doi']}")
150
+ logger.info(f" Storage: {storage_path}")
151
+
152
+ except Exception as exc_:
153
+ logger.error(f"Error processing '{title[:30]}...': {exc_}")
154
+
155
+ logger.success(
156
+ f"Created Scholar library entries for {len(results)}/{len(papers)} papers"
157
+ )
158
+ return results
159
+
160
+ async def resolve_and_create_library_structure_with_source_async(
161
+ self,
162
+ papers: List[Dict[str, Any]],
163
+ project: str,
164
+ sources: Optional[List[str]] = None,
165
+ bibtex_source_filename: str = "papers",
166
+ ) -> Dict[str, Dict[str, str]]:
167
+ """Enhanced version that passes source filename for BibTeX structure."""
168
+ self._source_filename = bibtex_source_filename
169
+ return await self.resolve_and_create_library_structure_async(
170
+ papers=papers, project=project, sources=sources
171
+ )
172
+
173
+ def _extract_enhanced_metadata(
174
+ self, doi_result: Optional[Dict], paper: Dict
175
+ ) -> Dict[str, Any]:
176
+ """Extract enhanced metadata from DOI resolution result."""
177
+ enhanced = {}
178
+ if doi_result and isinstance(doi_result, dict):
179
+ metadata_source = doi_result.get("metadata", {})
180
+ enhanced.update(
181
+ {
182
+ "doi": doi_result.get("doi"),
183
+ "journal": metadata_source.get("journal")
184
+ or doi_result.get("journal")
185
+ or paper.get("journal"),
186
+ "authors": metadata_source.get("authors")
187
+ or doi_result.get("authors")
188
+ or paper.get("authors"),
189
+ "year": metadata_source.get("year")
190
+ or doi_result.get("year")
191
+ or paper.get("year"),
192
+ "title": metadata_source.get("title")
193
+ or doi_result.get("title")
194
+ or paper.get("title"),
195
+ "abstract": metadata_source.get("abstract")
196
+ or doi_result.get("abstract"),
197
+ "publisher": metadata_source.get("publisher")
198
+ or doi_result.get("publisher"),
199
+ "volume": metadata_source.get("volume") or doi_result.get("volume"),
200
+ "issue": metadata_source.get("issue") or doi_result.get("issue"),
201
+ "pages": metadata_source.get("pages") or doi_result.get("pages"),
202
+ "issn": metadata_source.get("issn") or doi_result.get("issn"),
203
+ "short_journal": metadata_source.get("short_journal")
204
+ or doi_result.get("short_journal"),
205
+ }
206
+ )
207
+
208
+ if doi_result.get("doi"):
209
+ logger.success(
210
+ f"Enhanced metadata from DOI source: {dict(metadata_source)}"
211
+ )
212
+
213
+ return enhanced
214
+
215
+ def _create_complete_metadata(
216
+ self,
217
+ paper: Dict,
218
+ doi_result: Optional[Dict],
219
+ paper_id: str,
220
+ enhanced_metadata: Dict,
221
+ ) -> Dict[str, Any]:
222
+ """Create complete metadata dictionary with source tracking."""
223
+ raw_title = enhanced_metadata.get("title") or paper.get("title")
224
+ clean_title = TextNormalizer.clean_metadata_text(raw_title) if raw_title else ""
225
+ raw_abstract = None
226
+ if enhanced_metadata.get("abstract"):
227
+ raw_abstract = TextNormalizer.clean_metadata_text(
228
+ enhanced_metadata["abstract"]
229
+ )
230
+
231
+ doi_source_value = self._get_doi_source_value(doi_result)
232
+
233
+ complete_metadata = {
234
+ "title": clean_title,
235
+ "title_source": doi_source_value
236
+ if enhanced_metadata.get("title") != paper.get("title")
237
+ else "manual",
238
+ "authors": enhanced_metadata.get("authors") or paper.get("authors"),
239
+ "authors_source": doi_source_value
240
+ if enhanced_metadata.get("authors") != paper.get("authors")
241
+ else ("manual" if paper.get("authors") else None),
242
+ "year": enhanced_metadata.get("year") or paper.get("year"),
243
+ "year_source": doi_source_value
244
+ if enhanced_metadata.get("year") != paper.get("year")
245
+ else ("manual" if paper.get("year") else None),
246
+ "journal": enhanced_metadata.get("journal") or paper.get("journal"),
247
+ "journal_source": doi_source_value
248
+ if enhanced_metadata.get("journal") != paper.get("journal")
249
+ else ("manual" if paper.get("journal") else None),
250
+ "abstract": raw_abstract,
251
+ "abstract_source": doi_source_value
252
+ if enhanced_metadata.get("abstract")
253
+ else None,
254
+ "scitex_id": paper_id,
255
+ "created_at": datetime.now().isoformat(),
256
+ "created_by": "SciTeX Scholar",
257
+ }
258
+
259
+ if doi_result and isinstance(doi_result, dict):
260
+ safe_fields = [
261
+ "publisher",
262
+ "volume",
263
+ "issue",
264
+ "pages",
265
+ "issn",
266
+ "short_journal",
267
+ ]
268
+ for field in safe_fields:
269
+ value = enhanced_metadata.get(field)
270
+ if value is not None:
271
+ complete_metadata[field] = value
272
+ complete_metadata[f"{field}_source"] = (
273
+ doi_source_value or "unknown_api"
274
+ )
275
+
276
+ if doi_result and doi_result.get("doi"):
277
+ complete_metadata.update(
278
+ {"doi": doi_result["doi"], "doi_source": doi_source_value}
279
+ )
280
+ logger.success(f"DOI resolved for {paper_id}: {doi_result['doi']}")
281
+ else:
282
+ complete_metadata.update(
283
+ {"doi": None, "doi_source": None, "doi_resolution_failed": True}
284
+ )
285
+ logger.warning(
286
+ f"DOI resolution failed for {paper_id}: {paper.get('title', '')[:40]}..."
287
+ )
288
+
289
+ self._add_standard_fields(complete_metadata)
290
+
291
+ storage_paths = self._call_path_manager_get_storage_paths(
292
+ paper_info={**paper, **enhanced_metadata}, collection_name="MASTER"
293
+ )
294
+ storage_path = storage_paths["storage_path"]
295
+
296
+ complete_metadata.update(
297
+ {
298
+ "master_storage_path": str(storage_path),
299
+ "readable_name": storage_paths["readable_name"],
300
+ "metadata_file": str(storage_path / "metadata.json"),
301
+ }
302
+ )
303
+
304
+ return complete_metadata
305
+
306
+ def _get_doi_source_value(self, doi_result: Optional[Dict]) -> Optional[str]:
307
+ """Get normalized DOI source value."""
308
+ if not doi_result or not doi_result.get("source"):
309
+ return None
310
+
311
+ source = doi_result["source"]
312
+ if "crossref" in source.lower():
313
+ return "crossref"
314
+ elif "semantic" in source.lower():
315
+ return "semantic_scholar"
316
+ elif "pubmed" in source.lower():
317
+ return "pubmed"
318
+ elif "openalex" in source.lower():
319
+ return "openalex"
320
+ return source
321
+
322
+ def _add_standard_fields(self, complete_metadata: Dict) -> None:
323
+ """Add standard fields with None defaults."""
324
+ standard_fields = {
325
+ "keywords": None,
326
+ "references": None,
327
+ "venue": None,
328
+ "publisher": None,
329
+ "volume": None,
330
+ "issue": None,
331
+ "pages": None,
332
+ "issn": None,
333
+ "short_journal": None,
334
+ }
335
+
336
+ missing_fields = []
337
+ for field, default_value in standard_fields.items():
338
+ if field not in complete_metadata or complete_metadata[field] is None:
339
+ complete_metadata[field] = default_value
340
+ missing_fields.append(field)
341
+
342
+ if missing_fields:
343
+ logger.info(
344
+ f"Missing fields for future enhancement: {', '.join(missing_fields)}"
345
+ )
346
+
347
+ def _is_title_similar(
348
+ self, title1: str, title2: str, threshold: float = 0.7
349
+ ) -> bool:
350
+ """Check if two titles are similar enough to be considered the same paper."""
351
+ if not title1 or not title2:
352
+ return False
353
+
354
+ def normalize_title(title: str) -> str:
355
+ title = title.lower()
356
+ title = re.sub(r"[^\w\s]", " ", title)
357
+ title = re.sub(r"\s+", " ", title)
358
+ return title.strip()
359
+
360
+ norm_title1 = normalize_title(title1)
361
+ norm_title2 = normalize_title(title2)
362
+
363
+ words1 = set(norm_title1.split())
364
+ words2 = set(norm_title2.split())
365
+
366
+ if not words1 or not words2:
367
+ return False
368
+
369
+ intersection = len(words1.intersection(words2))
370
+ union = len(words1.union(words2))
371
+ similarity = intersection / union if union > 0 else 0.0
372
+
373
+ return similarity >= threshold
374
+
375
+
376
+ # EOF
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_storage_helpers.py
4
+
5
+ """
6
+ Storage helper mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from datetime import datetime
13
+ from typing import TYPE_CHECKING, Dict, Optional
14
+
15
+ from scitex import logging
16
+
17
+ if TYPE_CHECKING:
18
+ from scitex.scholar.core.Paper import Paper
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class StorageHelpersMixin:
24
+ """Mixin providing storage helper methods."""
25
+
26
+ def has_metadata(self, paper_id: str) -> bool:
27
+ """Check if metadata.json exists for paper."""
28
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
29
+ return metadata_file.exists()
30
+
31
+ def has_urls(self, paper_id: str) -> bool:
32
+ """Check if PDF URLs exist in metadata."""
33
+ if not self.has_metadata(paper_id):
34
+ return False
35
+
36
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
37
+ try:
38
+ with open(metadata_file) as f:
39
+ data = json.load(f)
40
+
41
+ urls = data.get("metadata", {}).get("url", {}).get("pdfs", [])
42
+ return len(urls) > 0
43
+ except Exception:
44
+ return False
45
+
46
+ def has_pdf(self, paper_id: str) -> bool:
47
+ """Check if PDF file exists in storage."""
48
+ paper_dir = self.library_master_dir / paper_id
49
+ if not paper_dir.exists():
50
+ return False
51
+
52
+ pdf_files = list(paper_dir.glob("*.pdf"))
53
+ return len(pdf_files) > 0
54
+
55
+ def load_paper_from_id(self, paper_id: str) -> Optional[Paper]:
56
+ """Load Paper object from storage by ID."""
57
+ from scitex.scholar.core.Paper import Paper
58
+
59
+ metadata_file = self.library_master_dir / paper_id / "metadata.json"
60
+
61
+ if not metadata_file.exists():
62
+ return None
63
+
64
+ try:
65
+ with open(metadata_file) as f:
66
+ data = json.load(f)
67
+
68
+ paper = Paper.from_dict(data)
69
+ return paper
70
+
71
+ except Exception as e:
72
+ logger.error(f"Failed to load paper {paper_id}: {e}")
73
+ return None
74
+
75
+ def save_paper_incremental(self, paper_id: str, paper: Paper) -> None:
76
+ """Save Paper object to storage (incremental update)."""
77
+ storage_path = self.library_master_dir / paper_id
78
+ storage_path.mkdir(parents=True, exist_ok=True)
79
+
80
+ metadata_file = storage_path / "metadata.json"
81
+
82
+ existing_data = {}
83
+ if metadata_file.exists():
84
+ try:
85
+ with open(metadata_file) as f:
86
+ existing_data = json.load(f)
87
+ except Exception:
88
+ pass
89
+
90
+ new_data = paper.model_dump()
91
+ merged_data = self._merge_metadata(existing_data, new_data)
92
+
93
+ if "container" not in merged_data:
94
+ merged_data["container"] = {}
95
+ merged_data["container"]["updated_at"] = datetime.now().isoformat()
96
+
97
+ with open(metadata_file, "w") as f:
98
+ json.dump(merged_data, f, indent=2, ensure_ascii=False)
99
+
100
+ logger.debug(f"Saved paper {paper_id} to storage")
101
+
102
+ def _merge_metadata(self, existing: Dict, new: Dict) -> Dict:
103
+ """Recursively merge metadata dicts, preferring new non-None values."""
104
+ result = existing.copy()
105
+
106
+ for key, new_value in new.items():
107
+ if key not in result:
108
+ result[key] = new_value
109
+ elif new_value is None:
110
+ pass
111
+ elif isinstance(new_value, dict) and isinstance(result[key], dict):
112
+ result[key] = self._merge_metadata(result[key], new_value)
113
+ elif isinstance(new_value, list) and len(new_value) > 0:
114
+ result[key] = new_value
115
+ elif new_value:
116
+ result[key] = new_value
117
+
118
+ return result
119
+
120
+
121
+ # EOF