scitex 2.14.0__py3-none-any.whl → 2.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. scitex/__init__.py +47 -0
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +191 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/writer.py +21 -204
  17. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  18. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  19. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  20. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  21. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  22. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  23. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  24. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  25. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  26. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  27. scitex/audio/README.md +40 -36
  28. scitex/audio/__init__.py +127 -59
  29. scitex/audio/_branding.py +185 -0
  30. scitex/audio/_mcp/__init__.py +32 -0
  31. scitex/audio/_mcp/handlers.py +59 -6
  32. scitex/audio/_mcp/speak_handlers.py +238 -0
  33. scitex/audio/_relay.py +225 -0
  34. scitex/audio/engines/elevenlabs_engine.py +6 -1
  35. scitex/audio/mcp_server.py +228 -75
  36. scitex/canvas/README.md +1 -1
  37. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  38. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  39. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  40. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  41. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  42. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  43. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  44. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  45. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  46. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  47. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  48. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  49. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  50. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  51. scitex/cli/__init__.py +38 -43
  52. scitex/cli/audio.py +76 -27
  53. scitex/cli/capture.py +13 -20
  54. scitex/cli/introspect.py +443 -0
  55. scitex/cli/main.py +198 -109
  56. scitex/cli/mcp.py +60 -34
  57. scitex/cli/scholar/__init__.py +8 -0
  58. scitex/cli/scholar/_crossref_scitex.py +296 -0
  59. scitex/cli/scholar/_fetch.py +25 -3
  60. scitex/cli/social.py +314 -0
  61. scitex/cli/writer.py +117 -0
  62. scitex/config/README.md +1 -1
  63. scitex/config/__init__.py +16 -2
  64. scitex/config/_env_registry.py +191 -0
  65. scitex/diagram/__init__.py +42 -19
  66. scitex/diagram/mcp_server.py +13 -125
  67. scitex/introspect/__init__.py +75 -0
  68. scitex/introspect/_call_graph.py +303 -0
  69. scitex/introspect/_class_hierarchy.py +163 -0
  70. scitex/introspect/_core.py +42 -0
  71. scitex/introspect/_docstring.py +131 -0
  72. scitex/introspect/_examples.py +113 -0
  73. scitex/introspect/_imports.py +271 -0
  74. scitex/introspect/_mcp/__init__.py +37 -0
  75. scitex/introspect/_mcp/handlers.py +208 -0
  76. scitex/introspect/_members.py +151 -0
  77. scitex/introspect/_resolve.py +89 -0
  78. scitex/introspect/_signature.py +131 -0
  79. scitex/introspect/_source.py +80 -0
  80. scitex/introspect/_type_hints.py +172 -0
  81. scitex/io/bundle/README.md +1 -1
  82. scitex/mcp_server.py +98 -5
  83. scitex/plt/__init__.py +248 -550
  84. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  85. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  86. scitex/plt/gallery/README.md +1 -1
  87. scitex/plt/utils/_hitmap/__init__.py +82 -0
  88. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  89. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  90. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  91. scitex/plt/utils/_hitmap/_constants.py +40 -0
  92. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  93. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  94. scitex/plt/utils/_hitmap/_query.py +113 -0
  95. scitex/plt/utils/_hitmap.py +46 -1616
  96. scitex/plt/utils/_metadata/__init__.py +80 -0
  97. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  98. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  99. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  100. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  101. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  102. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  103. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  104. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  105. scitex/plt/utils/_metadata/_csv.py +416 -0
  106. scitex/plt/utils/_metadata/_detect.py +225 -0
  107. scitex/plt/utils/_metadata/_legend.py +127 -0
  108. scitex/plt/utils/_metadata/_rounding.py +117 -0
  109. scitex/plt/utils/_metadata/_verification.py +202 -0
  110. scitex/schema/README.md +1 -1
  111. scitex/scholar/__init__.py +8 -0
  112. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  113. scitex/scholar/core/Scholar.py +63 -1700
  114. scitex/scholar/core/_mixins/__init__.py +36 -0
  115. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  116. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  117. scitex/scholar/core/_mixins/_loaders.py +103 -0
  118. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  119. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  120. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  121. scitex/scholar/core/_mixins/_savers.py +69 -0
  122. scitex/scholar/core/_mixins/_search.py +103 -0
  123. scitex/scholar/core/_mixins/_services.py +88 -0
  124. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  125. scitex/scholar/crossref_scitex.py +367 -0
  126. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  127. scitex/scholar/examples/00_run_all.sh +120 -0
  128. scitex/scholar/jobs/_executors.py +27 -3
  129. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  130. scitex/scholar/pdf_download/_cli.py +154 -0
  131. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  132. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  133. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  134. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  135. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  136. scitex/scholar/pipelines/_single_steps.py +71 -36
  137. scitex/scholar/storage/_LibraryManager.py +97 -1695
  138. scitex/scholar/storage/_mixins/__init__.py +30 -0
  139. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  140. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  141. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  142. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  143. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  144. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  145. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  146. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  147. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  148. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  149. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  150. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  151. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  152. scitex/security/README.md +3 -3
  153. scitex/session/README.md +1 -1
  154. scitex/sh/README.md +1 -1
  155. scitex/social/__init__.py +153 -0
  156. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  157. scitex/template/README.md +1 -1
  158. scitex/template/clone_writer_directory.py +5 -5
  159. scitex/writer/README.md +1 -1
  160. scitex/writer/_mcp/handlers.py +11 -744
  161. scitex/writer/_mcp/tool_schemas.py +5 -335
  162. scitex-2.15.1.dist-info/METADATA +648 -0
  163. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/RECORD +166 -111
  164. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  165. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  166. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  167. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  168. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  169. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  170. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  171. scitex/diagram/_compile.py +0 -312
  172. scitex/diagram/_diagram.py +0 -355
  173. scitex/diagram/_mcp/__init__.py +0 -4
  174. scitex/diagram/_mcp/handlers.py +0 -400
  175. scitex/diagram/_mcp/tool_schemas.py +0 -157
  176. scitex/diagram/_presets.py +0 -173
  177. scitex/diagram/_schema.py +0 -182
  178. scitex/diagram/_split.py +0 -278
  179. scitex/plt/_mcp/__init__.py +0 -4
  180. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  181. scitex/plt/_mcp/_handlers_figure.py +0 -195
  182. scitex/plt/_mcp/_handlers_plot.py +0 -252
  183. scitex/plt/_mcp/_handlers_style.py +0 -219
  184. scitex/plt/_mcp/handlers.py +0 -74
  185. scitex/plt/_mcp/tool_schemas.py +0 -497
  186. scitex/plt/mcp_server.py +0 -231
  187. scitex/scholar/data/.gitkeep +0 -0
  188. scitex/scholar/data/README.md +0 -44
  189. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  190. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  191. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  192. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  193. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  194. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  195. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  196. scitex/scholar/data/bib_files/pac.bib +0 -698
  197. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  198. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  199. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  200. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  201. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  202. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  203. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  204. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  205. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  206. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  207. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  208. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  209. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  210. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  211. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  212. scitex/scholar/data/impact_factor.db +0 -0
  213. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  214. scitex/scholar/examples/dev.py +0 -38
  215. scitex-2.14.0.dist-info/METADATA +0 -1238
  216. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/WHEEL +0 -0
  217. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/entry_points.txt +0 -0
  218. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/__init__.py
4
+
5
+ """
6
+ Mixin classes for LibraryManager.
7
+
8
+ Each mixin provides a specific set of methods for the manager class.
9
+ """
10
+
11
+ from ._bibtex_handlers import BibtexHandlersMixin
12
+ from ._library_operations import LibraryOperationsMixin
13
+ from ._metadata_conversion import MetadataConversionMixin
14
+ from ._paper_saving import PaperSavingMixin
15
+ from ._resolution import ResolutionMixin
16
+ from ._storage_helpers import StorageHelpersMixin
17
+ from ._symlink_handlers import SymlinkHandlersMixin
18
+
19
+ __all__ = [
20
+ "StorageHelpersMixin",
21
+ "MetadataConversionMixin",
22
+ "PaperSavingMixin",
23
+ "ResolutionMixin",
24
+ "SymlinkHandlersMixin",
25
+ "BibtexHandlersMixin",
26
+ "LibraryOperationsMixin",
27
+ ]
28
+
29
+
30
+ # EOF
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_bibtex_handlers.py
4
+
5
+ """
6
+ BibTeX handling mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Any, Dict, Optional
14
+
15
+ from scitex import logging
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BibtexHandlersMixin:
21
+ """Mixin providing BibTeX handling methods."""
22
+
23
+ def _create_bibtex_info_structure(
24
+ self,
25
+ project: str,
26
+ paper_info: Dict[str, Any],
27
+ complete_metadata: Dict[str, Any],
28
+ bibtex_source_filename: str = "papers",
29
+ ) -> Optional[Path]:
30
+ """Create info/papers_bib/pac.bib structure."""
31
+ try:
32
+ project_dir = self.config.path_manager.get_library_project_dir(project)
33
+ info_dir = project_dir / "info" / f"{bibtex_source_filename}_bib"
34
+ info_dir.mkdir(parents=True, exist_ok=True)
35
+
36
+ bibtex_file = info_dir / f"{bibtex_source_filename}.bib"
37
+ unresolved_dir = info_dir / "unresolved"
38
+ unresolved_dir.mkdir(parents=True, exist_ok=True)
39
+
40
+ first_author = "unknown"
41
+ if complete_metadata.get("authors"):
42
+ authors = complete_metadata["authors"]
43
+ if isinstance(authors, list) and authors:
44
+ first_author = str(authors[0]).split()[-1].lower()
45
+ elif isinstance(authors, str):
46
+ first_author = authors.split()[-1].lower()
47
+
48
+ year = complete_metadata.get("year", "unknown")
49
+ entry_key = f"{first_author}{year}"
50
+
51
+ bibtex_entry = self._generate_bibtex_entry(complete_metadata, entry_key)
52
+
53
+ if bibtex_file.exists():
54
+ with open(bibtex_file, "a", encoding="utf-8") as file_:
55
+ file_.write(f"\n{bibtex_entry}")
56
+ else:
57
+ with open(bibtex_file, "w", encoding="utf-8") as file_:
58
+ file_.write(bibtex_entry)
59
+
60
+ if not complete_metadata.get("doi"):
61
+ unresolved_file = unresolved_dir / f"{entry_key}.json"
62
+ unresolved_data = {
63
+ "title": complete_metadata.get("title", ""),
64
+ "authors": complete_metadata.get("authors", []),
65
+ "year": complete_metadata.get("year", ""),
66
+ "journal": complete_metadata.get("journal", ""),
67
+ "scholar_id": complete_metadata.get("scholar_id", ""),
68
+ "resolution_failed": True,
69
+ "timestamp": complete_metadata.get("created_at", ""),
70
+ }
71
+ with open(unresolved_file, "w", encoding="utf-8") as file_:
72
+ json.dump(unresolved_data, file_, indent=2)
73
+ logger.info(f"Added unresolved entry: {unresolved_file}")
74
+
75
+ logger.success(f"Updated BibTeX info structure: {bibtex_file}")
76
+ return info_dir
77
+
78
+ except Exception as exc_:
79
+ logger.warning(f"Failed to create BibTeX info structure: {exc_}")
80
+ return None
81
+
82
+ def _generate_bibtex_entry(self, metadata: Dict[str, Any], entry_key: str) -> str:
83
+ """Generate BibTeX entry from metadata."""
84
+ entry_type = "article"
85
+ if metadata.get("journal"):
86
+ entry_type = "article"
87
+ elif metadata.get("booktitle"):
88
+ entry_type = "inproceedings"
89
+ elif metadata.get("publisher") and not metadata.get("journal"):
90
+ entry_type = "book"
91
+
92
+ bibtex = f"@{entry_type}{{{entry_key},\n"
93
+
94
+ field_mappings = {
95
+ "title": "title",
96
+ "authors": "author",
97
+ "year": "year",
98
+ "journal": "journal",
99
+ "doi": "doi",
100
+ "volume": "volume",
101
+ "issue": "number",
102
+ "pages": "pages",
103
+ "publisher": "publisher",
104
+ "booktitle": "booktitle",
105
+ "abstract": "abstract",
106
+ }
107
+
108
+ for meta_field, bibtex_field in field_mappings.items():
109
+ value = metadata.get(meta_field)
110
+ if value:
111
+ if isinstance(value, list):
112
+ value = " and ".join(str(val_) for val_ in value)
113
+ value_escaped = str(value).replace("{", "\\{").replace("}", "\\}")
114
+ bibtex += f" {bibtex_field} = {{{value_escaped}}},\n"
115
+
116
+ source_field = f"{meta_field}_source"
117
+ if source_field in metadata:
118
+ bibtex += f" % {bibtex_field}_source = {metadata[source_field]}\n"
119
+
120
+ bibtex += f" % scholar_id = {metadata.get('scholar_id', 'unknown')},\n"
121
+ bibtex += f" % created_at = {metadata.get('created_at', 'unknown')},\n"
122
+ bibtex += f" % created_by = {metadata.get('created_by', 'unknown')},\n"
123
+ bibtex += "}\n"
124
+
125
+ return bibtex
126
+
127
+
128
+ # EOF
@@ -0,0 +1,218 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_library_operations.py
4
+
5
+ """
6
+ Library operations mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import json
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from scitex import logging
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LibraryOperationsMixin:
23
+ """Mixin providing library operation methods."""
24
+
25
+ def update_library_metadata(
26
+ self,
27
+ paper_id: str,
28
+ project: str,
29
+ doi: str,
30
+ metadata: Dict[str, Any],
31
+ create_structure: bool = True,
32
+ ) -> bool:
33
+ """Update Scholar library metadata.json with resolved DOI."""
34
+ try:
35
+ library_path = self.config.path_manager.library_dir
36
+ paper_dir = library_path / project / paper_id
37
+ metadata_file = paper_dir / "metadata.json"
38
+
39
+ if create_structure and not paper_dir.exists():
40
+ self.config.path_manager._ensure_directory(paper_dir)
41
+ logger.info(f"Created Scholar library structure: {paper_dir}")
42
+
43
+ existing_metadata = {}
44
+ if metadata_file.exists():
45
+ try:
46
+ with open(metadata_file) as file_:
47
+ existing_metadata = json.load(file_)
48
+ except Exception as exc_:
49
+ logger.warning(f"Error loading existing metadata: {exc_}")
50
+
51
+ updated_metadata = {
52
+ **existing_metadata,
53
+ **metadata,
54
+ "doi": doi,
55
+ "doi_resolved_at": datetime.now().isoformat(),
56
+ "doi_source": "batch_doi_resolver",
57
+ }
58
+
59
+ with open(metadata_file, "w") as file_:
60
+ json.dump(updated_metadata, file_, indent=2)
61
+
62
+ logger.success(f"Updated metadata for {paper_id}: DOI {doi}")
63
+ return True
64
+
65
+ except Exception as exc_:
66
+ logger.error(f"Error updating library metadata for {paper_id}: {exc_}")
67
+ return False
68
+
69
+ def create_writer_directory_structure(self, paper_id: str, project: str) -> Path:
70
+ """Create basic paper directory structure."""
71
+ library_path = self.config.path_manager.library_dir
72
+ paper_dir = library_path / project / paper_id
73
+
74
+ self.config.path_manager._ensure_directory(paper_dir)
75
+
76
+ for subdir in ["attachments", "screenshots"]:
77
+ subdir_path = paper_dir / subdir
78
+ self.config.path_manager._ensure_directory(subdir_path)
79
+
80
+ logger.info(f"Created Scholar library structure: {paper_dir}")
81
+ return paper_dir
82
+
83
+ def validate_library_structure(self, project: str) -> Dict[str, Any]:
84
+ """Validate existing library structure for a project."""
85
+ validation = {
86
+ "valid": True,
87
+ "warnings": [],
88
+ "errors": [],
89
+ "paper_count": 0,
90
+ "missing_metadata": [],
91
+ }
92
+
93
+ library_path = self.config.path_manager.library_dir
94
+ project_dir = library_path / project
95
+
96
+ if not project_dir.exists():
97
+ validation["errors"].append(
98
+ f"Project directory does not exist: {project_dir}"
99
+ )
100
+ validation["valid"] = False
101
+ return validation
102
+
103
+ for paper_dir in project_dir.iterdir():
104
+ if paper_dir.is_dir() and len(paper_dir.name) == 8:
105
+ validation["paper_count"] += 1
106
+
107
+ metadata_file = paper_dir / "metadata.json"
108
+ if not metadata_file.exists():
109
+ validation["missing_metadata"].append(paper_dir.name)
110
+ validation["warnings"].append(
111
+ f"Missing metadata.json: {paper_dir.name}"
112
+ )
113
+
114
+ return validation
115
+
116
+ def resolve_and_update_library(
117
+ self,
118
+ papers_with_ids: List[Dict[str, Any]],
119
+ project: str,
120
+ sources: Optional[List[str]] = None,
121
+ ) -> Dict[str, str]:
122
+ """Resolve DOIs and update Scholar library metadata.json files."""
123
+ if not self.single_doi_resolver:
124
+ raise ValueError("SingleDOIResolver is required for resolving DOIs")
125
+
126
+ results = {}
127
+ for paper in papers_with_ids:
128
+ paper_id = paper.get("paper_id")
129
+ if not paper_id:
130
+ logger.warning(
131
+ f"Skipping paper without paper_id: {paper.get('title', 'Unknown')}"
132
+ )
133
+ continue
134
+
135
+ title = paper.get("title")
136
+ if not title:
137
+ logger.warning(f"Skipping paper {paper_id} without title")
138
+ continue
139
+
140
+ logger.info(f"Resolving DOI for {paper_id}: {title[:50]}...")
141
+
142
+ try:
143
+ result = asyncio.run(
144
+ self.single_doi_resolver.metadata2doi_async(
145
+ title=title,
146
+ year=paper.get("year"),
147
+ authors=paper.get("authors"),
148
+ sources=sources,
149
+ )
150
+ )
151
+
152
+ if result and isinstance(result, dict) and result.get("doi"):
153
+ doi = result["doi"]
154
+
155
+ success = self.update_library_metadata(
156
+ paper_id=paper_id,
157
+ project=project,
158
+ doi=doi,
159
+ metadata={
160
+ "title": title,
161
+ "title_source": "input",
162
+ "year": paper.get("year"),
163
+ "year_source": "input" if paper.get("year") else None,
164
+ "authors": paper.get("authors"),
165
+ "authors_source": "input" if paper.get("authors") else None,
166
+ "journal": paper.get("journal"),
167
+ "journal_source": "input" if paper.get("journal") else None,
168
+ "doi_resolution_source": result.get("source"),
169
+ },
170
+ )
171
+
172
+ if success:
173
+ results[paper_id] = doi
174
+ logger.success(f"{paper_id}: {doi}")
175
+ else:
176
+ logger.error(
177
+ f"{paper_id}: DOI resolved but metadata update failed"
178
+ )
179
+ else:
180
+ logger.warning(f"{paper_id}: No DOI found")
181
+
182
+ except Exception as exc_:
183
+ logger.error(f"{paper_id}: Error during resolution: {exc_}")
184
+
185
+ logger.success(
186
+ f"Resolved {len(results)}/{len(papers_with_ids)} DOIs and updated library metadata"
187
+ )
188
+ return results
189
+
190
+ def resolve_and_create_library_structure(
191
+ self,
192
+ papers: List[Dict[str, Any]],
193
+ project: str,
194
+ sources: Optional[List[str]] = None,
195
+ ) -> Dict[str, Dict[str, str]]:
196
+ """Synchronous wrapper for resolve_and_create_library_structure_async."""
197
+ try:
198
+ loop = asyncio.get_event_loop()
199
+ if loop.is_running():
200
+ raise RuntimeError(
201
+ "Cannot run synchronous version in async context. "
202
+ "Use resolve_and_create_library_structure_async() instead."
203
+ )
204
+ else:
205
+ return loop.run_until_complete(
206
+ self.resolve_and_create_library_structure_async(
207
+ papers, project, sources
208
+ )
209
+ )
210
+ except RuntimeError:
211
+ return asyncio.run(
212
+ self.resolve_and_create_library_structure_async(
213
+ papers, project, sources
214
+ )
215
+ )
216
+
217
+
218
+ # EOF
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-01-24 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_metadata_conversion.py
4
+
5
+ """
6
+ Metadata conversion mixin for LibraryManager.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import copy
12
+ from collections import OrderedDict
13
+ from typing import Any, Dict
14
+
15
+ from scitex.scholar.metadata_engines.utils import BASE_STRUCTURE
16
+
17
+
18
+ class MetadataConversionMixin:
19
+ """Mixin providing metadata conversion methods."""
20
+
21
+ def _dotdict_to_dict(self, obj):
22
+ """Recursively convert DotDict to plain dict for JSON serialization."""
23
+ from scitex.dict import DotDict
24
+
25
+ if isinstance(obj, DotDict):
26
+ return {k: self._dotdict_to_dict(v) for k, v in obj._data.items()}
27
+ elif isinstance(obj, dict):
28
+ return {k: self._dotdict_to_dict(v) for k, v in obj.items()}
29
+ elif isinstance(obj, list):
30
+ return [self._dotdict_to_dict(item) for item in obj]
31
+ else:
32
+ return obj
33
+
34
+ def _add_engine_to_list(self, engines_list: list, source: str) -> None:
35
+ """Helper to add source to engines list if not already present."""
36
+ if source and source not in engines_list:
37
+ engines_list.append(source)
38
+
39
+ def _convert_to_standardized_metadata(self, flat_metadata: Dict) -> OrderedDict:
40
+ """Convert flat metadata dict to standardized nested structure."""
41
+ standardized = copy.deepcopy(BASE_STRUCTURE)
42
+
43
+ # ID section
44
+ if "doi" in flat_metadata:
45
+ standardized["id"]["doi"] = flat_metadata["doi"]
46
+ self._add_engine_to_list(
47
+ standardized["id"]["doi_engines"],
48
+ flat_metadata.get("doi_source"),
49
+ )
50
+ if "scitex_id" in flat_metadata:
51
+ standardized["id"]["scholar_id"] = flat_metadata["scitex_id"]
52
+
53
+ # Basic section
54
+ if "title" in flat_metadata:
55
+ standardized["basic"]["title"] = flat_metadata["title"]
56
+ self._add_engine_to_list(
57
+ standardized["basic"]["title_engines"],
58
+ flat_metadata.get("title_source"),
59
+ )
60
+ if "authors" in flat_metadata:
61
+ standardized["basic"]["authors"] = flat_metadata["authors"]
62
+ self._add_engine_to_list(
63
+ standardized["basic"]["authors_engines"],
64
+ flat_metadata.get("authors_source"),
65
+ )
66
+ if "year" in flat_metadata:
67
+ standardized["basic"]["year"] = flat_metadata["year"]
68
+ self._add_engine_to_list(
69
+ standardized["basic"]["year_engines"],
70
+ flat_metadata.get("year_source"),
71
+ )
72
+ if "abstract" in flat_metadata:
73
+ standardized["basic"]["abstract"] = flat_metadata["abstract"]
74
+ self._add_engine_to_list(
75
+ standardized["basic"]["abstract_engines"],
76
+ flat_metadata.get("abstract_source"),
77
+ )
78
+
79
+ # Citation count section
80
+ self._convert_citation_count(flat_metadata, standardized)
81
+
82
+ # Publication section
83
+ self._convert_publication_fields(flat_metadata, standardized)
84
+
85
+ # URL section
86
+ self._convert_url_fields(flat_metadata, standardized)
87
+
88
+ # Path section
89
+ if "pdf_path" in flat_metadata:
90
+ standardized["path"]["pdfs"] = [flat_metadata["pdf_path"]]
91
+ self._add_engine_to_list(
92
+ standardized["path"]["pdfs_engines"],
93
+ "ScholarPDFDownloaderWithScreenshotsParallel",
94
+ )
95
+
96
+ return standardized
97
+
98
+ def _convert_citation_count(
99
+ self, flat_metadata: Dict, standardized: OrderedDict
100
+ ) -> None:
101
+ """Convert citation count fields to standardized format."""
102
+ if "citation_count" not in flat_metadata:
103
+ return
104
+
105
+ cc_value = flat_metadata["citation_count"]
106
+ if isinstance(cc_value, dict):
107
+ standardized["citation_count"]["total"] = cc_value.get("total")
108
+ self._add_engine_to_list(
109
+ standardized["citation_count"]["total_engines"],
110
+ cc_value.get("total_source"),
111
+ )
112
+ for year in [
113
+ "2025",
114
+ "2024",
115
+ "2023",
116
+ "2022",
117
+ "2021",
118
+ "2020",
119
+ "2019",
120
+ "2018",
121
+ "2017",
122
+ "2016",
123
+ "2015",
124
+ ]:
125
+ if year in cc_value:
126
+ standardized["citation_count"][year] = cc_value[year]
127
+ if f"{year}_source" in cc_value:
128
+ self._add_engine_to_list(
129
+ standardized["citation_count"][f"{year}_engines"],
130
+ cc_value.get(f"{year}_source"),
131
+ )
132
+ else:
133
+ standardized["citation_count"]["total"] = cc_value
134
+ self._add_engine_to_list(
135
+ standardized["citation_count"]["total_engines"],
136
+ flat_metadata.get("citation_count_source"),
137
+ )
138
+
139
+ def _convert_publication_fields(
140
+ self, flat_metadata: Dict, standardized: OrderedDict
141
+ ) -> None:
142
+ """Convert publication fields to standardized format."""
143
+ if "journal" in flat_metadata:
144
+ standardized["publication"]["journal"] = flat_metadata["journal"]
145
+ self._add_engine_to_list(
146
+ standardized["publication"]["journal_engines"],
147
+ flat_metadata.get("journal_source"),
148
+ )
149
+ if "short_journal" in flat_metadata:
150
+ standardized["publication"]["short_journal"] = flat_metadata[
151
+ "short_journal"
152
+ ]
153
+ if "impact_factor" in flat_metadata:
154
+ standardized["publication"]["impact_factor"] = flat_metadata[
155
+ "impact_factor"
156
+ ]
157
+ if "issn" in flat_metadata:
158
+ standardized["publication"]["issn"] = flat_metadata["issn"]
159
+ if "volume" in flat_metadata:
160
+ standardized["publication"]["volume"] = flat_metadata["volume"]
161
+ if "issue" in flat_metadata:
162
+ standardized["publication"]["issue"] = flat_metadata["issue"]
163
+ if "pages" in flat_metadata:
164
+ pages = flat_metadata["pages"]
165
+ if pages and "-" in str(pages):
166
+ first, last = str(pages).split("-", 1)
167
+ standardized["publication"]["first_page"] = first.strip()
168
+ standardized["publication"]["last_page"] = last.strip()
169
+ if "publisher" in flat_metadata:
170
+ standardized["publication"]["publisher"] = flat_metadata["publisher"]
171
+
172
+ def _convert_url_fields(
173
+ self, flat_metadata: Dict, standardized: OrderedDict
174
+ ) -> None:
175
+ """Convert URL fields to standardized format."""
176
+ if "url_doi" in flat_metadata:
177
+ standardized["url"]["doi"] = flat_metadata["url_doi"]
178
+ if "url_publisher" in flat_metadata:
179
+ standardized["url"]["publisher"] = flat_metadata["url_publisher"]
180
+ self._add_engine_to_list(
181
+ standardized["url"]["publisher_engines"], "ScholarURLFinder"
182
+ )
183
+ if "url_openurl_query" in flat_metadata:
184
+ standardized["url"]["openurl_query"] = flat_metadata["url_openurl_query"]
185
+ if "url_openurl_resolved" in flat_metadata:
186
+ standardized["url"]["openurl_resolved"] = flat_metadata[
187
+ "url_openurl_resolved"
188
+ ]
189
+ self._add_engine_to_list(
190
+ standardized["url"]["openurl_resolved_engines"], "ScholarURLFinder"
191
+ )
192
+ if "urls_pdf" in flat_metadata:
193
+ standardized["url"]["pdfs"] = flat_metadata["urls_pdf"]
194
+ self._add_engine_to_list(
195
+ standardized["url"]["pdfs_engines"], "ScholarURLFinder"
196
+ )
197
+
198
+ def _call_path_manager_get_storage_paths(
199
+ self, paper_info: Dict, collection_name: str = "MASTER"
200
+ ) -> Dict[str, Any]:
201
+ """Helper to call PathManager's get_paper_storage_paths with proper parameters."""
202
+ doi = paper_info.get("doi")
203
+ title = paper_info.get("title")
204
+ authors = paper_info.get("authors", [])
205
+ year = paper_info.get("year")
206
+ journal = paper_info.get("journal")
207
+
208
+ storage_path, readable_name, paper_id = (
209
+ self.config.path_manager.get_paper_storage_paths(
210
+ doi=doi,
211
+ title=title,
212
+ authors=authors,
213
+ year=year,
214
+ journal=journal,
215
+ project=collection_name,
216
+ )
217
+ )
218
+
219
+ return {
220
+ "storage_path": storage_path,
221
+ "readable_name": readable_name,
222
+ "unique_id": paper_id,
223
+ }
224
+
225
+
226
+ # EOF