scitex 2.14.0__py3-none-any.whl → 2.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +71 -17
- scitex/_env_loader.py +156 -0
- scitex/_mcp_resources/__init__.py +37 -0
- scitex/_mcp_resources/_cheatsheet.py +135 -0
- scitex/_mcp_resources/_figrecipe.py +138 -0
- scitex/_mcp_resources/_formats.py +102 -0
- scitex/_mcp_resources/_modules.py +337 -0
- scitex/_mcp_resources/_session.py +149 -0
- scitex/_mcp_tools/__init__.py +4 -0
- scitex/_mcp_tools/audio.py +66 -0
- scitex/_mcp_tools/diagram.py +11 -95
- scitex/_mcp_tools/introspect.py +210 -0
- scitex/_mcp_tools/plt.py +260 -305
- scitex/_mcp_tools/scholar.py +74 -0
- scitex/_mcp_tools/social.py +27 -0
- scitex/_mcp_tools/template.py +24 -0
- scitex/_mcp_tools/writer.py +17 -210
- scitex/ai/_gen_ai/_PARAMS.py +10 -7
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
- scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
- scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
- scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
- scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
- scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
- scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
- scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
- scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
- scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
- scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
- scitex/audio/README.md +40 -36
- scitex/audio/__init__.py +129 -61
- scitex/audio/_branding.py +185 -0
- scitex/audio/_mcp/__init__.py +32 -0
- scitex/audio/_mcp/handlers.py +59 -6
- scitex/audio/_mcp/speak_handlers.py +238 -0
- scitex/audio/_relay.py +225 -0
- scitex/audio/_tts.py +18 -10
- scitex/audio/engines/base.py +17 -10
- scitex/audio/engines/elevenlabs_engine.py +7 -2
- scitex/audio/mcp_server.py +228 -75
- scitex/canvas/README.md +1 -1
- scitex/canvas/editor/_dearpygui/__init__.py +25 -0
- scitex/canvas/editor/_dearpygui/_editor.py +147 -0
- scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
- scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
- scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
- scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
- scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
- scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
- scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
- scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
- scitex/canvas/editor/_dearpygui/_selection.py +295 -0
- scitex/canvas/editor/_dearpygui/_state.py +93 -0
- scitex/canvas/editor/_dearpygui/_utils.py +61 -0
- scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
- scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
- scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
- scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
- scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
- scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
- scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
- scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
- scitex/canvas/editor/flask_editor/_core.py +25 -1684
- scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
- scitex/cli/__init__.py +38 -43
- scitex/cli/audio.py +160 -41
- scitex/cli/capture.py +133 -20
- scitex/cli/introspect.py +488 -0
- scitex/cli/main.py +200 -109
- scitex/cli/mcp.py +60 -34
- scitex/cli/plt.py +414 -0
- scitex/cli/repro.py +15 -8
- scitex/cli/resource.py +15 -8
- scitex/cli/scholar/__init__.py +154 -8
- scitex/cli/scholar/_crossref_scitex.py +296 -0
- scitex/cli/scholar/_fetch.py +25 -3
- scitex/cli/social.py +355 -0
- scitex/cli/stats.py +136 -11
- scitex/cli/template.py +129 -12
- scitex/cli/tex.py +15 -8
- scitex/cli/writer.py +49 -299
- scitex/cloud/__init__.py +41 -2
- scitex/config/README.md +1 -1
- scitex/config/__init__.py +16 -2
- scitex/config/_env_registry.py +256 -0
- scitex/context/__init__.py +22 -0
- scitex/dev/__init__.py +20 -1
- scitex/diagram/__init__.py +42 -19
- scitex/diagram/mcp_server.py +13 -125
- scitex/gen/__init__.py +50 -14
- scitex/gen/_list_packages.py +4 -4
- scitex/introspect/__init__.py +82 -0
- scitex/introspect/_call_graph.py +303 -0
- scitex/introspect/_class_hierarchy.py +163 -0
- scitex/introspect/_core.py +41 -0
- scitex/introspect/_docstring.py +131 -0
- scitex/introspect/_examples.py +113 -0
- scitex/introspect/_imports.py +271 -0
- scitex/{gen/_inspect_module.py → introspect/_list_api.py} +48 -56
- scitex/introspect/_mcp/__init__.py +41 -0
- scitex/introspect/_mcp/handlers.py +233 -0
- scitex/introspect/_members.py +155 -0
- scitex/introspect/_resolve.py +89 -0
- scitex/introspect/_signature.py +131 -0
- scitex/introspect/_source.py +80 -0
- scitex/introspect/_type_hints.py +172 -0
- scitex/io/_save.py +1 -2
- scitex/io/bundle/README.md +1 -1
- scitex/logging/_formatters.py +19 -9
- scitex/mcp_server.py +98 -5
- scitex/os/__init__.py +4 -0
- scitex/{gen → os}/_check_host.py +4 -5
- scitex/plt/__init__.py +245 -550
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
- scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/plt/gallery/README.md +1 -1
- scitex/plt/utils/_hitmap/__init__.py +82 -0
- scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
- scitex/plt/utils/_hitmap/_color_application.py +346 -0
- scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
- scitex/plt/utils/_hitmap/_constants.py +40 -0
- scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
- scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
- scitex/plt/utils/_hitmap/_query.py +113 -0
- scitex/plt/utils/_hitmap.py +46 -1616
- scitex/plt/utils/_metadata/__init__.py +80 -0
- scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
- scitex/plt/utils/_metadata/_artists/_base.py +195 -0
- scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
- scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
- scitex/plt/utils/_metadata/_artists/_images.py +80 -0
- scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
- scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
- scitex/plt/utils/_metadata/_artists/_text.py +106 -0
- scitex/plt/utils/_metadata/_csv.py +416 -0
- scitex/plt/utils/_metadata/_detect.py +225 -0
- scitex/plt/utils/_metadata/_legend.py +127 -0
- scitex/plt/utils/_metadata/_rounding.py +117 -0
- scitex/plt/utils/_metadata/_verification.py +202 -0
- scitex/schema/README.md +1 -1
- scitex/scholar/__init__.py +8 -0
- scitex/scholar/_mcp/crossref_handlers.py +265 -0
- scitex/scholar/core/Scholar.py +63 -1700
- scitex/scholar/core/_mixins/__init__.py +36 -0
- scitex/scholar/core/_mixins/_enrichers.py +270 -0
- scitex/scholar/core/_mixins/_library_handlers.py +100 -0
- scitex/scholar/core/_mixins/_loaders.py +103 -0
- scitex/scholar/core/_mixins/_pdf_download.py +375 -0
- scitex/scholar/core/_mixins/_pipeline.py +312 -0
- scitex/scholar/core/_mixins/_project_handlers.py +125 -0
- scitex/scholar/core/_mixins/_savers.py +69 -0
- scitex/scholar/core/_mixins/_search.py +103 -0
- scitex/scholar/core/_mixins/_services.py +88 -0
- scitex/scholar/core/_mixins/_url_finding.py +105 -0
- scitex/scholar/crossref_scitex.py +367 -0
- scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/scholar/examples/00_run_all.sh +120 -0
- scitex/scholar/jobs/_executors.py +27 -3
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
- scitex/scholar/pdf_download/_cli.py +154 -0
- scitex/scholar/pdf_download/strategies/__init__.py +11 -8
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
- scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
- scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
- scitex/scholar/pipelines/_single_steps.py +71 -36
- scitex/scholar/storage/_LibraryManager.py +97 -1695
- scitex/scholar/storage/_mixins/__init__.py +30 -0
- scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
- scitex/scholar/storage/_mixins/_library_operations.py +218 -0
- scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
- scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
- scitex/scholar/storage/_mixins/_resolution.py +376 -0
- scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
- scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
- scitex/security/README.md +3 -3
- scitex/session/README.md +1 -1
- scitex/session/__init__.py +26 -7
- scitex/session/_decorator.py +1 -1
- scitex/sh/README.md +1 -1
- scitex/sh/__init__.py +7 -4
- scitex/social/__init__.py +155 -0
- scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
- scitex/stats/_mcp/_handlers/__init__.py +31 -0
- scitex/stats/_mcp/_handlers/_corrections.py +113 -0
- scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
- scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
- scitex/stats/_mcp/_handlers/_format.py +94 -0
- scitex/stats/_mcp/_handlers/_normality.py +110 -0
- scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
- scitex/stats/_mcp/_handlers/_power.py +247 -0
- scitex/stats/_mcp/_handlers/_recommend.py +102 -0
- scitex/stats/_mcp/_handlers/_run_test.py +279 -0
- scitex/stats/_mcp/_handlers/_stars.py +48 -0
- scitex/stats/_mcp/handlers.py +19 -1171
- scitex/stats/auto/_stat_style.py +175 -0
- scitex/stats/auto/_style_definitions.py +411 -0
- scitex/stats/auto/_styles.py +22 -620
- scitex/stats/descriptive/__init__.py +11 -8
- scitex/stats/descriptive/_ci.py +39 -0
- scitex/stats/power/_power.py +15 -4
- scitex/str/__init__.py +2 -1
- scitex/str/_title_case.py +63 -0
- scitex/template/README.md +1 -1
- scitex/template/__init__.py +25 -10
- scitex/template/_code_templates.py +147 -0
- scitex/template/_mcp/handlers.py +81 -0
- scitex/template/_mcp/tool_schemas.py +55 -0
- scitex/template/_templates/__init__.py +51 -0
- scitex/template/_templates/audio.py +233 -0
- scitex/template/_templates/canvas.py +312 -0
- scitex/template/_templates/capture.py +268 -0
- scitex/template/_templates/config.py +43 -0
- scitex/template/_templates/diagram.py +294 -0
- scitex/template/_templates/io.py +107 -0
- scitex/template/_templates/module.py +53 -0
- scitex/template/_templates/plt.py +202 -0
- scitex/template/_templates/scholar.py +267 -0
- scitex/template/_templates/session.py +130 -0
- scitex/template/_templates/session_minimal.py +43 -0
- scitex/template/_templates/session_plot.py +67 -0
- scitex/template/_templates/session_stats.py +77 -0
- scitex/template/_templates/stats.py +323 -0
- scitex/template/_templates/writer.py +296 -0
- scitex/template/clone_writer_directory.py +5 -5
- scitex/ui/_backends/_email.py +10 -2
- scitex/ui/_backends/_webhook.py +5 -1
- scitex/web/_search_pubmed.py +10 -6
- scitex/writer/README.md +1 -1
- scitex/writer/__init__.py +43 -34
- scitex/writer/_mcp/handlers.py +11 -744
- scitex/writer/_mcp/tool_schemas.py +5 -335
- scitex-2.15.3.dist-info/METADATA +667 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/RECORD +241 -120
- scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
- scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
- scitex/diagram/_compile.py +0 -312
- scitex/diagram/_diagram.py +0 -355
- scitex/diagram/_mcp/__init__.py +0 -4
- scitex/diagram/_mcp/handlers.py +0 -400
- scitex/diagram/_mcp/tool_schemas.py +0 -157
- scitex/diagram/_presets.py +0 -173
- scitex/diagram/_schema.py +0 -182
- scitex/diagram/_split.py +0 -278
- scitex/gen/_ci.py +0 -12
- scitex/gen/_title_case.py +0 -89
- scitex/plt/_mcp/__init__.py +0 -4
- scitex/plt/_mcp/_handlers_annotation.py +0 -102
- scitex/plt/_mcp/_handlers_figure.py +0 -195
- scitex/plt/_mcp/_handlers_plot.py +0 -252
- scitex/plt/_mcp/_handlers_style.py +0 -219
- scitex/plt/_mcp/handlers.py +0 -74
- scitex/plt/_mcp/tool_schemas.py +0 -497
- scitex/plt/mcp_server.py +0 -231
- scitex/scholar/examples/SUGGESTIONS.md +0 -865
- scitex/scholar/examples/dev.py +0 -38
- scitex-2.14.0.dist-info/METADATA +0 -1238
- /scitex/{gen → context}/_detect_environment.py +0 -0
- /scitex/{gen → context}/_get_notebook_path.py +0 -0
- /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/WHEEL +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/entry_points.txt +0 -0
- {scitex-2.14.0.dist-info → scitex-2.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: "2026-01-24 (ywatanabe)"
|
|
3
|
+
# File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/__init__.py
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Mixin classes for LibraryManager.
|
|
7
|
+
|
|
8
|
+
Each mixin provides a specific set of methods for the manager class.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from ._bibtex_handlers import BibtexHandlersMixin
|
|
12
|
+
from ._library_operations import LibraryOperationsMixin
|
|
13
|
+
from ._metadata_conversion import MetadataConversionMixin
|
|
14
|
+
from ._paper_saving import PaperSavingMixin
|
|
15
|
+
from ._resolution import ResolutionMixin
|
|
16
|
+
from ._storage_helpers import StorageHelpersMixin
|
|
17
|
+
from ._symlink_handlers import SymlinkHandlersMixin
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"StorageHelpersMixin",
|
|
21
|
+
"MetadataConversionMixin",
|
|
22
|
+
"PaperSavingMixin",
|
|
23
|
+
"ResolutionMixin",
|
|
24
|
+
"SymlinkHandlersMixin",
|
|
25
|
+
"BibtexHandlersMixin",
|
|
26
|
+
"LibraryOperationsMixin",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# EOF
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: "2026-01-24 (ywatanabe)"
|
|
3
|
+
# File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_bibtex_handlers.py
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
BibTeX handling mixin for LibraryManager.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
from scitex import logging
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BibtexHandlersMixin:
|
|
21
|
+
"""Mixin providing BibTeX handling methods."""
|
|
22
|
+
|
|
23
|
+
def _create_bibtex_info_structure(
|
|
24
|
+
self,
|
|
25
|
+
project: str,
|
|
26
|
+
paper_info: Dict[str, Any],
|
|
27
|
+
complete_metadata: Dict[str, Any],
|
|
28
|
+
bibtex_source_filename: str = "papers",
|
|
29
|
+
) -> Optional[Path]:
|
|
30
|
+
"""Create info/papers_bib/pac.bib structure."""
|
|
31
|
+
try:
|
|
32
|
+
project_dir = self.config.path_manager.get_library_project_dir(project)
|
|
33
|
+
info_dir = project_dir / "info" / f"{bibtex_source_filename}_bib"
|
|
34
|
+
info_dir.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
|
|
36
|
+
bibtex_file = info_dir / f"{bibtex_source_filename}.bib"
|
|
37
|
+
unresolved_dir = info_dir / "unresolved"
|
|
38
|
+
unresolved_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
first_author = "unknown"
|
|
41
|
+
if complete_metadata.get("authors"):
|
|
42
|
+
authors = complete_metadata["authors"]
|
|
43
|
+
if isinstance(authors, list) and authors:
|
|
44
|
+
first_author = str(authors[0]).split()[-1].lower()
|
|
45
|
+
elif isinstance(authors, str):
|
|
46
|
+
first_author = authors.split()[-1].lower()
|
|
47
|
+
|
|
48
|
+
year = complete_metadata.get("year", "unknown")
|
|
49
|
+
entry_key = f"{first_author}{year}"
|
|
50
|
+
|
|
51
|
+
bibtex_entry = self._generate_bibtex_entry(complete_metadata, entry_key)
|
|
52
|
+
|
|
53
|
+
if bibtex_file.exists():
|
|
54
|
+
with open(bibtex_file, "a", encoding="utf-8") as file_:
|
|
55
|
+
file_.write(f"\n{bibtex_entry}")
|
|
56
|
+
else:
|
|
57
|
+
with open(bibtex_file, "w", encoding="utf-8") as file_:
|
|
58
|
+
file_.write(bibtex_entry)
|
|
59
|
+
|
|
60
|
+
if not complete_metadata.get("doi"):
|
|
61
|
+
unresolved_file = unresolved_dir / f"{entry_key}.json"
|
|
62
|
+
unresolved_data = {
|
|
63
|
+
"title": complete_metadata.get("title", ""),
|
|
64
|
+
"authors": complete_metadata.get("authors", []),
|
|
65
|
+
"year": complete_metadata.get("year", ""),
|
|
66
|
+
"journal": complete_metadata.get("journal", ""),
|
|
67
|
+
"scholar_id": complete_metadata.get("scholar_id", ""),
|
|
68
|
+
"resolution_failed": True,
|
|
69
|
+
"timestamp": complete_metadata.get("created_at", ""),
|
|
70
|
+
}
|
|
71
|
+
with open(unresolved_file, "w", encoding="utf-8") as file_:
|
|
72
|
+
json.dump(unresolved_data, file_, indent=2)
|
|
73
|
+
logger.info(f"Added unresolved entry: {unresolved_file}")
|
|
74
|
+
|
|
75
|
+
logger.success(f"Updated BibTeX info structure: {bibtex_file}")
|
|
76
|
+
return info_dir
|
|
77
|
+
|
|
78
|
+
except Exception as exc_:
|
|
79
|
+
logger.warning(f"Failed to create BibTeX info structure: {exc_}")
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def _generate_bibtex_entry(self, metadata: Dict[str, Any], entry_key: str) -> str:
|
|
83
|
+
"""Generate BibTeX entry from metadata."""
|
|
84
|
+
entry_type = "article"
|
|
85
|
+
if metadata.get("journal"):
|
|
86
|
+
entry_type = "article"
|
|
87
|
+
elif metadata.get("booktitle"):
|
|
88
|
+
entry_type = "inproceedings"
|
|
89
|
+
elif metadata.get("publisher") and not metadata.get("journal"):
|
|
90
|
+
entry_type = "book"
|
|
91
|
+
|
|
92
|
+
bibtex = f"@{entry_type}{{{entry_key},\n"
|
|
93
|
+
|
|
94
|
+
field_mappings = {
|
|
95
|
+
"title": "title",
|
|
96
|
+
"authors": "author",
|
|
97
|
+
"year": "year",
|
|
98
|
+
"journal": "journal",
|
|
99
|
+
"doi": "doi",
|
|
100
|
+
"volume": "volume",
|
|
101
|
+
"issue": "number",
|
|
102
|
+
"pages": "pages",
|
|
103
|
+
"publisher": "publisher",
|
|
104
|
+
"booktitle": "booktitle",
|
|
105
|
+
"abstract": "abstract",
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
for meta_field, bibtex_field in field_mappings.items():
|
|
109
|
+
value = metadata.get(meta_field)
|
|
110
|
+
if value:
|
|
111
|
+
if isinstance(value, list):
|
|
112
|
+
value = " and ".join(str(val_) for val_ in value)
|
|
113
|
+
value_escaped = str(value).replace("{", "\\{").replace("}", "\\}")
|
|
114
|
+
bibtex += f" {bibtex_field} = {{{value_escaped}}},\n"
|
|
115
|
+
|
|
116
|
+
source_field = f"{meta_field}_source"
|
|
117
|
+
if source_field in metadata:
|
|
118
|
+
bibtex += f" % {bibtex_field}_source = {metadata[source_field]}\n"
|
|
119
|
+
|
|
120
|
+
bibtex += f" % scholar_id = {metadata.get('scholar_id', 'unknown')},\n"
|
|
121
|
+
bibtex += f" % created_at = {metadata.get('created_at', 'unknown')},\n"
|
|
122
|
+
bibtex += f" % created_by = {metadata.get('created_by', 'unknown')},\n"
|
|
123
|
+
bibtex += "}\n"
|
|
124
|
+
|
|
125
|
+
return bibtex
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# EOF
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: "2026-01-24 (ywatanabe)"
|
|
3
|
+
# File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_library_operations.py
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Library operations mixin for LibraryManager.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import json
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from scitex import logging
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LibraryOperationsMixin:
|
|
23
|
+
"""Mixin providing library operation methods."""
|
|
24
|
+
|
|
25
|
+
def update_library_metadata(
|
|
26
|
+
self,
|
|
27
|
+
paper_id: str,
|
|
28
|
+
project: str,
|
|
29
|
+
doi: str,
|
|
30
|
+
metadata: Dict[str, Any],
|
|
31
|
+
create_structure: bool = True,
|
|
32
|
+
) -> bool:
|
|
33
|
+
"""Update Scholar library metadata.json with resolved DOI."""
|
|
34
|
+
try:
|
|
35
|
+
library_path = self.config.path_manager.library_dir
|
|
36
|
+
paper_dir = library_path / project / paper_id
|
|
37
|
+
metadata_file = paper_dir / "metadata.json"
|
|
38
|
+
|
|
39
|
+
if create_structure and not paper_dir.exists():
|
|
40
|
+
self.config.path_manager._ensure_directory(paper_dir)
|
|
41
|
+
logger.info(f"Created Scholar library structure: {paper_dir}")
|
|
42
|
+
|
|
43
|
+
existing_metadata = {}
|
|
44
|
+
if metadata_file.exists():
|
|
45
|
+
try:
|
|
46
|
+
with open(metadata_file) as file_:
|
|
47
|
+
existing_metadata = json.load(file_)
|
|
48
|
+
except Exception as exc_:
|
|
49
|
+
logger.warning(f"Error loading existing metadata: {exc_}")
|
|
50
|
+
|
|
51
|
+
updated_metadata = {
|
|
52
|
+
**existing_metadata,
|
|
53
|
+
**metadata,
|
|
54
|
+
"doi": doi,
|
|
55
|
+
"doi_resolved_at": datetime.now().isoformat(),
|
|
56
|
+
"doi_source": "batch_doi_resolver",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
with open(metadata_file, "w") as file_:
|
|
60
|
+
json.dump(updated_metadata, file_, indent=2)
|
|
61
|
+
|
|
62
|
+
logger.success(f"Updated metadata for {paper_id}: DOI {doi}")
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
except Exception as exc_:
|
|
66
|
+
logger.error(f"Error updating library metadata for {paper_id}: {exc_}")
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def create_writer_directory_structure(self, paper_id: str, project: str) -> Path:
|
|
70
|
+
"""Create basic paper directory structure."""
|
|
71
|
+
library_path = self.config.path_manager.library_dir
|
|
72
|
+
paper_dir = library_path / project / paper_id
|
|
73
|
+
|
|
74
|
+
self.config.path_manager._ensure_directory(paper_dir)
|
|
75
|
+
|
|
76
|
+
for subdir in ["attachments", "screenshots"]:
|
|
77
|
+
subdir_path = paper_dir / subdir
|
|
78
|
+
self.config.path_manager._ensure_directory(subdir_path)
|
|
79
|
+
|
|
80
|
+
logger.info(f"Created Scholar library structure: {paper_dir}")
|
|
81
|
+
return paper_dir
|
|
82
|
+
|
|
83
|
+
def validate_library_structure(self, project: str) -> Dict[str, Any]:
|
|
84
|
+
"""Validate existing library structure for a project."""
|
|
85
|
+
validation = {
|
|
86
|
+
"valid": True,
|
|
87
|
+
"warnings": [],
|
|
88
|
+
"errors": [],
|
|
89
|
+
"paper_count": 0,
|
|
90
|
+
"missing_metadata": [],
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
library_path = self.config.path_manager.library_dir
|
|
94
|
+
project_dir = library_path / project
|
|
95
|
+
|
|
96
|
+
if not project_dir.exists():
|
|
97
|
+
validation["errors"].append(
|
|
98
|
+
f"Project directory does not exist: {project_dir}"
|
|
99
|
+
)
|
|
100
|
+
validation["valid"] = False
|
|
101
|
+
return validation
|
|
102
|
+
|
|
103
|
+
for paper_dir in project_dir.iterdir():
|
|
104
|
+
if paper_dir.is_dir() and len(paper_dir.name) == 8:
|
|
105
|
+
validation["paper_count"] += 1
|
|
106
|
+
|
|
107
|
+
metadata_file = paper_dir / "metadata.json"
|
|
108
|
+
if not metadata_file.exists():
|
|
109
|
+
validation["missing_metadata"].append(paper_dir.name)
|
|
110
|
+
validation["warnings"].append(
|
|
111
|
+
f"Missing metadata.json: {paper_dir.name}"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return validation
|
|
115
|
+
|
|
116
|
+
def resolve_and_update_library(
|
|
117
|
+
self,
|
|
118
|
+
papers_with_ids: List[Dict[str, Any]],
|
|
119
|
+
project: str,
|
|
120
|
+
sources: Optional[List[str]] = None,
|
|
121
|
+
) -> Dict[str, str]:
|
|
122
|
+
"""Resolve DOIs and update Scholar library metadata.json files."""
|
|
123
|
+
if not self.single_doi_resolver:
|
|
124
|
+
raise ValueError("SingleDOIResolver is required for resolving DOIs")
|
|
125
|
+
|
|
126
|
+
results = {}
|
|
127
|
+
for paper in papers_with_ids:
|
|
128
|
+
paper_id = paper.get("paper_id")
|
|
129
|
+
if not paper_id:
|
|
130
|
+
logger.warning(
|
|
131
|
+
f"Skipping paper without paper_id: {paper.get('title', 'Unknown')}"
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
title = paper.get("title")
|
|
136
|
+
if not title:
|
|
137
|
+
logger.warning(f"Skipping paper {paper_id} without title")
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
logger.info(f"Resolving DOI for {paper_id}: {title[:50]}...")
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
result = asyncio.run(
|
|
144
|
+
self.single_doi_resolver.metadata2doi_async(
|
|
145
|
+
title=title,
|
|
146
|
+
year=paper.get("year"),
|
|
147
|
+
authors=paper.get("authors"),
|
|
148
|
+
sources=sources,
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if result and isinstance(result, dict) and result.get("doi"):
|
|
153
|
+
doi = result["doi"]
|
|
154
|
+
|
|
155
|
+
success = self.update_library_metadata(
|
|
156
|
+
paper_id=paper_id,
|
|
157
|
+
project=project,
|
|
158
|
+
doi=doi,
|
|
159
|
+
metadata={
|
|
160
|
+
"title": title,
|
|
161
|
+
"title_source": "input",
|
|
162
|
+
"year": paper.get("year"),
|
|
163
|
+
"year_source": "input" if paper.get("year") else None,
|
|
164
|
+
"authors": paper.get("authors"),
|
|
165
|
+
"authors_source": "input" if paper.get("authors") else None,
|
|
166
|
+
"journal": paper.get("journal"),
|
|
167
|
+
"journal_source": "input" if paper.get("journal") else None,
|
|
168
|
+
"doi_resolution_source": result.get("source"),
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if success:
|
|
173
|
+
results[paper_id] = doi
|
|
174
|
+
logger.success(f"{paper_id}: {doi}")
|
|
175
|
+
else:
|
|
176
|
+
logger.error(
|
|
177
|
+
f"{paper_id}: DOI resolved but metadata update failed"
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
logger.warning(f"{paper_id}: No DOI found")
|
|
181
|
+
|
|
182
|
+
except Exception as exc_:
|
|
183
|
+
logger.error(f"{paper_id}: Error during resolution: {exc_}")
|
|
184
|
+
|
|
185
|
+
logger.success(
|
|
186
|
+
f"Resolved {len(results)}/{len(papers_with_ids)} DOIs and updated library metadata"
|
|
187
|
+
)
|
|
188
|
+
return results
|
|
189
|
+
|
|
190
|
+
def resolve_and_create_library_structure(
|
|
191
|
+
self,
|
|
192
|
+
papers: List[Dict[str, Any]],
|
|
193
|
+
project: str,
|
|
194
|
+
sources: Optional[List[str]] = None,
|
|
195
|
+
) -> Dict[str, Dict[str, str]]:
|
|
196
|
+
"""Synchronous wrapper for resolve_and_create_library_structure_async."""
|
|
197
|
+
try:
|
|
198
|
+
loop = asyncio.get_event_loop()
|
|
199
|
+
if loop.is_running():
|
|
200
|
+
raise RuntimeError(
|
|
201
|
+
"Cannot run synchronous version in async context. "
|
|
202
|
+
"Use resolve_and_create_library_structure_async() instead."
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
return loop.run_until_complete(
|
|
206
|
+
self.resolve_and_create_library_structure_async(
|
|
207
|
+
papers, project, sources
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
except RuntimeError:
|
|
211
|
+
return asyncio.run(
|
|
212
|
+
self.resolve_and_create_library_structure_async(
|
|
213
|
+
papers, project, sources
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# EOF
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: "2026-01-24 (ywatanabe)"
|
|
3
|
+
# File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/storage/_mixins/_metadata_conversion.py
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Metadata conversion mixin for LibraryManager.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import copy
|
|
12
|
+
from collections import OrderedDict
|
|
13
|
+
from typing import Any, Dict
|
|
14
|
+
|
|
15
|
+
from scitex.scholar.metadata_engines.utils import BASE_STRUCTURE
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MetadataConversionMixin:
|
|
19
|
+
"""Mixin providing metadata conversion methods."""
|
|
20
|
+
|
|
21
|
+
def _dotdict_to_dict(self, obj):
|
|
22
|
+
"""Recursively convert DotDict to plain dict for JSON serialization."""
|
|
23
|
+
from scitex.dict import DotDict
|
|
24
|
+
|
|
25
|
+
if isinstance(obj, DotDict):
|
|
26
|
+
return {k: self._dotdict_to_dict(v) for k, v in obj._data.items()}
|
|
27
|
+
elif isinstance(obj, dict):
|
|
28
|
+
return {k: self._dotdict_to_dict(v) for k, v in obj.items()}
|
|
29
|
+
elif isinstance(obj, list):
|
|
30
|
+
return [self._dotdict_to_dict(item) for item in obj]
|
|
31
|
+
else:
|
|
32
|
+
return obj
|
|
33
|
+
|
|
34
|
+
def _add_engine_to_list(self, engines_list: list, source: str) -> None:
|
|
35
|
+
"""Helper to add source to engines list if not already present."""
|
|
36
|
+
if source and source not in engines_list:
|
|
37
|
+
engines_list.append(source)
|
|
38
|
+
|
|
39
|
+
def _convert_to_standardized_metadata(self, flat_metadata: Dict) -> OrderedDict:
|
|
40
|
+
"""Convert flat metadata dict to standardized nested structure."""
|
|
41
|
+
standardized = copy.deepcopy(BASE_STRUCTURE)
|
|
42
|
+
|
|
43
|
+
# ID section
|
|
44
|
+
if "doi" in flat_metadata:
|
|
45
|
+
standardized["id"]["doi"] = flat_metadata["doi"]
|
|
46
|
+
self._add_engine_to_list(
|
|
47
|
+
standardized["id"]["doi_engines"],
|
|
48
|
+
flat_metadata.get("doi_source"),
|
|
49
|
+
)
|
|
50
|
+
if "scitex_id" in flat_metadata:
|
|
51
|
+
standardized["id"]["scholar_id"] = flat_metadata["scitex_id"]
|
|
52
|
+
|
|
53
|
+
# Basic section
|
|
54
|
+
if "title" in flat_metadata:
|
|
55
|
+
standardized["basic"]["title"] = flat_metadata["title"]
|
|
56
|
+
self._add_engine_to_list(
|
|
57
|
+
standardized["basic"]["title_engines"],
|
|
58
|
+
flat_metadata.get("title_source"),
|
|
59
|
+
)
|
|
60
|
+
if "authors" in flat_metadata:
|
|
61
|
+
standardized["basic"]["authors"] = flat_metadata["authors"]
|
|
62
|
+
self._add_engine_to_list(
|
|
63
|
+
standardized["basic"]["authors_engines"],
|
|
64
|
+
flat_metadata.get("authors_source"),
|
|
65
|
+
)
|
|
66
|
+
if "year" in flat_metadata:
|
|
67
|
+
standardized["basic"]["year"] = flat_metadata["year"]
|
|
68
|
+
self._add_engine_to_list(
|
|
69
|
+
standardized["basic"]["year_engines"],
|
|
70
|
+
flat_metadata.get("year_source"),
|
|
71
|
+
)
|
|
72
|
+
if "abstract" in flat_metadata:
|
|
73
|
+
standardized["basic"]["abstract"] = flat_metadata["abstract"]
|
|
74
|
+
self._add_engine_to_list(
|
|
75
|
+
standardized["basic"]["abstract_engines"],
|
|
76
|
+
flat_metadata.get("abstract_source"),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Citation count section
|
|
80
|
+
self._convert_citation_count(flat_metadata, standardized)
|
|
81
|
+
|
|
82
|
+
# Publication section
|
|
83
|
+
self._convert_publication_fields(flat_metadata, standardized)
|
|
84
|
+
|
|
85
|
+
# URL section
|
|
86
|
+
self._convert_url_fields(flat_metadata, standardized)
|
|
87
|
+
|
|
88
|
+
# Path section
|
|
89
|
+
if "pdf_path" in flat_metadata:
|
|
90
|
+
standardized["path"]["pdfs"] = [flat_metadata["pdf_path"]]
|
|
91
|
+
self._add_engine_to_list(
|
|
92
|
+
standardized["path"]["pdfs_engines"],
|
|
93
|
+
"ScholarPDFDownloaderWithScreenshotsParallel",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return standardized
|
|
97
|
+
|
|
98
|
+
def _convert_citation_count(
|
|
99
|
+
self, flat_metadata: Dict, standardized: OrderedDict
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Convert citation count fields to standardized format."""
|
|
102
|
+
if "citation_count" not in flat_metadata:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
cc_value = flat_metadata["citation_count"]
|
|
106
|
+
if isinstance(cc_value, dict):
|
|
107
|
+
standardized["citation_count"]["total"] = cc_value.get("total")
|
|
108
|
+
self._add_engine_to_list(
|
|
109
|
+
standardized["citation_count"]["total_engines"],
|
|
110
|
+
cc_value.get("total_source"),
|
|
111
|
+
)
|
|
112
|
+
for year in [
|
|
113
|
+
"2025",
|
|
114
|
+
"2024",
|
|
115
|
+
"2023",
|
|
116
|
+
"2022",
|
|
117
|
+
"2021",
|
|
118
|
+
"2020",
|
|
119
|
+
"2019",
|
|
120
|
+
"2018",
|
|
121
|
+
"2017",
|
|
122
|
+
"2016",
|
|
123
|
+
"2015",
|
|
124
|
+
]:
|
|
125
|
+
if year in cc_value:
|
|
126
|
+
standardized["citation_count"][year] = cc_value[year]
|
|
127
|
+
if f"{year}_source" in cc_value:
|
|
128
|
+
self._add_engine_to_list(
|
|
129
|
+
standardized["citation_count"][f"{year}_engines"],
|
|
130
|
+
cc_value.get(f"{year}_source"),
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
standardized["citation_count"]["total"] = cc_value
|
|
134
|
+
self._add_engine_to_list(
|
|
135
|
+
standardized["citation_count"]["total_engines"],
|
|
136
|
+
flat_metadata.get("citation_count_source"),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _convert_publication_fields(
|
|
140
|
+
self, flat_metadata: Dict, standardized: OrderedDict
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Convert publication fields to standardized format."""
|
|
143
|
+
if "journal" in flat_metadata:
|
|
144
|
+
standardized["publication"]["journal"] = flat_metadata["journal"]
|
|
145
|
+
self._add_engine_to_list(
|
|
146
|
+
standardized["publication"]["journal_engines"],
|
|
147
|
+
flat_metadata.get("journal_source"),
|
|
148
|
+
)
|
|
149
|
+
if "short_journal" in flat_metadata:
|
|
150
|
+
standardized["publication"]["short_journal"] = flat_metadata[
|
|
151
|
+
"short_journal"
|
|
152
|
+
]
|
|
153
|
+
if "impact_factor" in flat_metadata:
|
|
154
|
+
standardized["publication"]["impact_factor"] = flat_metadata[
|
|
155
|
+
"impact_factor"
|
|
156
|
+
]
|
|
157
|
+
if "issn" in flat_metadata:
|
|
158
|
+
standardized["publication"]["issn"] = flat_metadata["issn"]
|
|
159
|
+
if "volume" in flat_metadata:
|
|
160
|
+
standardized["publication"]["volume"] = flat_metadata["volume"]
|
|
161
|
+
if "issue" in flat_metadata:
|
|
162
|
+
standardized["publication"]["issue"] = flat_metadata["issue"]
|
|
163
|
+
if "pages" in flat_metadata:
|
|
164
|
+
pages = flat_metadata["pages"]
|
|
165
|
+
if pages and "-" in str(pages):
|
|
166
|
+
first, last = str(pages).split("-", 1)
|
|
167
|
+
standardized["publication"]["first_page"] = first.strip()
|
|
168
|
+
standardized["publication"]["last_page"] = last.strip()
|
|
169
|
+
if "publisher" in flat_metadata:
|
|
170
|
+
standardized["publication"]["publisher"] = flat_metadata["publisher"]
|
|
171
|
+
|
|
172
|
+
def _convert_url_fields(
|
|
173
|
+
self, flat_metadata: Dict, standardized: OrderedDict
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Convert URL fields to standardized format."""
|
|
176
|
+
if "url_doi" in flat_metadata:
|
|
177
|
+
standardized["url"]["doi"] = flat_metadata["url_doi"]
|
|
178
|
+
if "url_publisher" in flat_metadata:
|
|
179
|
+
standardized["url"]["publisher"] = flat_metadata["url_publisher"]
|
|
180
|
+
self._add_engine_to_list(
|
|
181
|
+
standardized["url"]["publisher_engines"], "ScholarURLFinder"
|
|
182
|
+
)
|
|
183
|
+
if "url_openurl_query" in flat_metadata:
|
|
184
|
+
standardized["url"]["openurl_query"] = flat_metadata["url_openurl_query"]
|
|
185
|
+
if "url_openurl_resolved" in flat_metadata:
|
|
186
|
+
standardized["url"]["openurl_resolved"] = flat_metadata[
|
|
187
|
+
"url_openurl_resolved"
|
|
188
|
+
]
|
|
189
|
+
self._add_engine_to_list(
|
|
190
|
+
standardized["url"]["openurl_resolved_engines"], "ScholarURLFinder"
|
|
191
|
+
)
|
|
192
|
+
if "urls_pdf" in flat_metadata:
|
|
193
|
+
standardized["url"]["pdfs"] = flat_metadata["urls_pdf"]
|
|
194
|
+
self._add_engine_to_list(
|
|
195
|
+
standardized["url"]["pdfs_engines"], "ScholarURLFinder"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def _call_path_manager_get_storage_paths(
|
|
199
|
+
self, paper_info: Dict, collection_name: str = "MASTER"
|
|
200
|
+
) -> Dict[str, Any]:
|
|
201
|
+
"""Helper to call PathManager's get_paper_storage_paths with proper parameters."""
|
|
202
|
+
doi = paper_info.get("doi")
|
|
203
|
+
title = paper_info.get("title")
|
|
204
|
+
authors = paper_info.get("authors", [])
|
|
205
|
+
year = paper_info.get("year")
|
|
206
|
+
journal = paper_info.get("journal")
|
|
207
|
+
|
|
208
|
+
storage_path, readable_name, paper_id = (
|
|
209
|
+
self.config.path_manager.get_paper_storage_paths(
|
|
210
|
+
doi=doi,
|
|
211
|
+
title=title,
|
|
212
|
+
authors=authors,
|
|
213
|
+
year=year,
|
|
214
|
+
journal=journal,
|
|
215
|
+
project=collection_name,
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
"storage_path": storage_path,
|
|
221
|
+
"readable_name": readable_name,
|
|
222
|
+
"unique_id": paper_id,
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# EOF
|