scitex 2.14.0__py3-none-any.whl → 2.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. scitex/__init__.py +47 -0
  2. scitex/_env_loader.py +156 -0
  3. scitex/_mcp_resources/__init__.py +37 -0
  4. scitex/_mcp_resources/_cheatsheet.py +135 -0
  5. scitex/_mcp_resources/_figrecipe.py +138 -0
  6. scitex/_mcp_resources/_formats.py +102 -0
  7. scitex/_mcp_resources/_modules.py +337 -0
  8. scitex/_mcp_resources/_session.py +149 -0
  9. scitex/_mcp_tools/__init__.py +4 -0
  10. scitex/_mcp_tools/audio.py +66 -0
  11. scitex/_mcp_tools/diagram.py +11 -95
  12. scitex/_mcp_tools/introspect.py +191 -0
  13. scitex/_mcp_tools/plt.py +260 -305
  14. scitex/_mcp_tools/scholar.py +74 -0
  15. scitex/_mcp_tools/social.py +244 -0
  16. scitex/_mcp_tools/writer.py +21 -204
  17. scitex/ai/_gen_ai/_PARAMS.py +10 -7
  18. scitex/ai/classification/reporters/_SingleClassificationReporter.py +45 -1603
  19. scitex/ai/classification/reporters/_mixins/__init__.py +36 -0
  20. scitex/ai/classification/reporters/_mixins/_constants.py +67 -0
  21. scitex/ai/classification/reporters/_mixins/_cv_summary.py +387 -0
  22. scitex/ai/classification/reporters/_mixins/_feature_importance.py +119 -0
  23. scitex/ai/classification/reporters/_mixins/_metrics.py +275 -0
  24. scitex/ai/classification/reporters/_mixins/_plotting.py +179 -0
  25. scitex/ai/classification/reporters/_mixins/_reports.py +153 -0
  26. scitex/ai/classification/reporters/_mixins/_storage.py +160 -0
  27. scitex/audio/README.md +40 -36
  28. scitex/audio/__init__.py +127 -59
  29. scitex/audio/_branding.py +185 -0
  30. scitex/audio/_mcp/__init__.py +32 -0
  31. scitex/audio/_mcp/handlers.py +59 -6
  32. scitex/audio/_mcp/speak_handlers.py +238 -0
  33. scitex/audio/_relay.py +225 -0
  34. scitex/audio/engines/elevenlabs_engine.py +6 -1
  35. scitex/audio/mcp_server.py +228 -75
  36. scitex/canvas/README.md +1 -1
  37. scitex/canvas/editor/_dearpygui/__init__.py +25 -0
  38. scitex/canvas/editor/_dearpygui/_editor.py +147 -0
  39. scitex/canvas/editor/_dearpygui/_handlers.py +476 -0
  40. scitex/canvas/editor/_dearpygui/_panels/__init__.py +17 -0
  41. scitex/canvas/editor/_dearpygui/_panels/_control.py +119 -0
  42. scitex/canvas/editor/_dearpygui/_panels/_element_controls.py +190 -0
  43. scitex/canvas/editor/_dearpygui/_panels/_preview.py +43 -0
  44. scitex/canvas/editor/_dearpygui/_panels/_sections.py +390 -0
  45. scitex/canvas/editor/_dearpygui/_plotting.py +187 -0
  46. scitex/canvas/editor/_dearpygui/_rendering.py +504 -0
  47. scitex/canvas/editor/_dearpygui/_selection.py +295 -0
  48. scitex/canvas/editor/_dearpygui/_state.py +93 -0
  49. scitex/canvas/editor/_dearpygui/_utils.py +61 -0
  50. scitex/canvas/editor/flask_editor/templates/__init__.py +32 -70
  51. scitex/cli/__init__.py +38 -43
  52. scitex/cli/audio.py +76 -27
  53. scitex/cli/capture.py +13 -20
  54. scitex/cli/introspect.py +443 -0
  55. scitex/cli/main.py +198 -109
  56. scitex/cli/mcp.py +60 -34
  57. scitex/cli/scholar/__init__.py +8 -0
  58. scitex/cli/scholar/_crossref_scitex.py +296 -0
  59. scitex/cli/scholar/_fetch.py +25 -3
  60. scitex/cli/social.py +314 -0
  61. scitex/cli/writer.py +117 -0
  62. scitex/config/README.md +1 -1
  63. scitex/config/__init__.py +16 -2
  64. scitex/config/_env_registry.py +191 -0
  65. scitex/diagram/__init__.py +42 -19
  66. scitex/diagram/mcp_server.py +13 -125
  67. scitex/introspect/__init__.py +75 -0
  68. scitex/introspect/_call_graph.py +303 -0
  69. scitex/introspect/_class_hierarchy.py +163 -0
  70. scitex/introspect/_core.py +42 -0
  71. scitex/introspect/_docstring.py +131 -0
  72. scitex/introspect/_examples.py +113 -0
  73. scitex/introspect/_imports.py +271 -0
  74. scitex/introspect/_mcp/__init__.py +37 -0
  75. scitex/introspect/_mcp/handlers.py +208 -0
  76. scitex/introspect/_members.py +151 -0
  77. scitex/introspect/_resolve.py +89 -0
  78. scitex/introspect/_signature.py +131 -0
  79. scitex/introspect/_source.py +80 -0
  80. scitex/introspect/_type_hints.py +172 -0
  81. scitex/io/bundle/README.md +1 -1
  82. scitex/mcp_server.py +98 -5
  83. scitex/plt/__init__.py +248 -550
  84. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +5 -10
  85. scitex/plt/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  86. scitex/plt/gallery/README.md +1 -1
  87. scitex/plt/utils/_hitmap/__init__.py +82 -0
  88. scitex/plt/utils/_hitmap/_artist_extraction.py +343 -0
  89. scitex/plt/utils/_hitmap/_color_application.py +346 -0
  90. scitex/plt/utils/_hitmap/_color_conversion.py +121 -0
  91. scitex/plt/utils/_hitmap/_constants.py +40 -0
  92. scitex/plt/utils/_hitmap/_hitmap_core.py +334 -0
  93. scitex/plt/utils/_hitmap/_path_extraction.py +357 -0
  94. scitex/plt/utils/_hitmap/_query.py +113 -0
  95. scitex/plt/utils/_hitmap.py +46 -1616
  96. scitex/plt/utils/_metadata/__init__.py +80 -0
  97. scitex/plt/utils/_metadata/_artists/__init__.py +25 -0
  98. scitex/plt/utils/_metadata/_artists/_base.py +195 -0
  99. scitex/plt/utils/_metadata/_artists/_collections.py +356 -0
  100. scitex/plt/utils/_metadata/_artists/_extract.py +57 -0
  101. scitex/plt/utils/_metadata/_artists/_images.py +80 -0
  102. scitex/plt/utils/_metadata/_artists/_lines.py +261 -0
  103. scitex/plt/utils/_metadata/_artists/_patches.py +247 -0
  104. scitex/plt/utils/_metadata/_artists/_text.py +106 -0
  105. scitex/plt/utils/_metadata/_csv.py +416 -0
  106. scitex/plt/utils/_metadata/_detect.py +225 -0
  107. scitex/plt/utils/_metadata/_legend.py +127 -0
  108. scitex/plt/utils/_metadata/_rounding.py +117 -0
  109. scitex/plt/utils/_metadata/_verification.py +202 -0
  110. scitex/schema/README.md +1 -1
  111. scitex/scholar/__init__.py +8 -0
  112. scitex/scholar/_mcp/crossref_handlers.py +265 -0
  113. scitex/scholar/core/Scholar.py +63 -1700
  114. scitex/scholar/core/_mixins/__init__.py +36 -0
  115. scitex/scholar/core/_mixins/_enrichers.py +270 -0
  116. scitex/scholar/core/_mixins/_library_handlers.py +100 -0
  117. scitex/scholar/core/_mixins/_loaders.py +103 -0
  118. scitex/scholar/core/_mixins/_pdf_download.py +375 -0
  119. scitex/scholar/core/_mixins/_pipeline.py +312 -0
  120. scitex/scholar/core/_mixins/_project_handlers.py +125 -0
  121. scitex/scholar/core/_mixins/_savers.py +69 -0
  122. scitex/scholar/core/_mixins/_search.py +103 -0
  123. scitex/scholar/core/_mixins/_services.py +88 -0
  124. scitex/scholar/core/_mixins/_url_finding.py +105 -0
  125. scitex/scholar/crossref_scitex.py +367 -0
  126. scitex/scholar/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  127. scitex/scholar/examples/00_run_all.sh +120 -0
  128. scitex/scholar/jobs/_executors.py +27 -3
  129. scitex/scholar/pdf_download/ScholarPDFDownloader.py +38 -416
  130. scitex/scholar/pdf_download/_cli.py +154 -0
  131. scitex/scholar/pdf_download/strategies/__init__.py +11 -8
  132. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +80 -3
  133. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +73 -121
  134. scitex/scholar/pipelines/ScholarPipelineParallel.py +80 -138
  135. scitex/scholar/pipelines/ScholarPipelineSingle.py +43 -63
  136. scitex/scholar/pipelines/_single_steps.py +71 -36
  137. scitex/scholar/storage/_LibraryManager.py +97 -1695
  138. scitex/scholar/storage/_mixins/__init__.py +30 -0
  139. scitex/scholar/storage/_mixins/_bibtex_handlers.py +128 -0
  140. scitex/scholar/storage/_mixins/_library_operations.py +218 -0
  141. scitex/scholar/storage/_mixins/_metadata_conversion.py +226 -0
  142. scitex/scholar/storage/_mixins/_paper_saving.py +456 -0
  143. scitex/scholar/storage/_mixins/_resolution.py +376 -0
  144. scitex/scholar/storage/_mixins/_storage_helpers.py +121 -0
  145. scitex/scholar/storage/_mixins/_symlink_handlers.py +226 -0
  146. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  147. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  148. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  149. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  150. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  151. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  152. scitex/security/README.md +3 -3
  153. scitex/session/README.md +1 -1
  154. scitex/sh/README.md +1 -1
  155. scitex/social/__init__.py +153 -0
  156. scitex/social/docs/EXTERNAL_PACKAGE_BRANDING.md +149 -0
  157. scitex/template/README.md +1 -1
  158. scitex/template/clone_writer_directory.py +5 -5
  159. scitex/writer/README.md +1 -1
  160. scitex/writer/_mcp/handlers.py +11 -744
  161. scitex/writer/_mcp/tool_schemas.py +5 -335
  162. scitex-2.15.1.dist-info/METADATA +648 -0
  163. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/RECORD +166 -111
  164. scitex/canvas/editor/flask_editor/templates/_scripts.py +0 -4933
  165. scitex/canvas/editor/flask_editor/templates/_styles.py +0 -1658
  166. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  167. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  168. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  169. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  170. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  171. scitex/diagram/_compile.py +0 -312
  172. scitex/diagram/_diagram.py +0 -355
  173. scitex/diagram/_mcp/__init__.py +0 -4
  174. scitex/diagram/_mcp/handlers.py +0 -400
  175. scitex/diagram/_mcp/tool_schemas.py +0 -157
  176. scitex/diagram/_presets.py +0 -173
  177. scitex/diagram/_schema.py +0 -182
  178. scitex/diagram/_split.py +0 -278
  179. scitex/plt/_mcp/__init__.py +0 -4
  180. scitex/plt/_mcp/_handlers_annotation.py +0 -102
  181. scitex/plt/_mcp/_handlers_figure.py +0 -195
  182. scitex/plt/_mcp/_handlers_plot.py +0 -252
  183. scitex/plt/_mcp/_handlers_style.py +0 -219
  184. scitex/plt/_mcp/handlers.py +0 -74
  185. scitex/plt/_mcp/tool_schemas.py +0 -497
  186. scitex/plt/mcp_server.py +0 -231
  187. scitex/scholar/data/.gitkeep +0 -0
  188. scitex/scholar/data/README.md +0 -44
  189. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  190. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  191. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  192. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  193. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  194. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  195. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  196. scitex/scholar/data/bib_files/pac.bib +0 -698
  197. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  198. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  199. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  200. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  201. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  202. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  203. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  204. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  205. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  206. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  207. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  208. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  209. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  210. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  211. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  212. scitex/scholar/data/impact_factor.db +0 -0
  213. scitex/scholar/examples/SUGGESTIONS.md +0 -865
  214. scitex/scholar/examples/dev.py +0 -38
  215. scitex-2.14.0.dist-info/METADATA +0 -1238
  216. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/WHEEL +0 -0
  217. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/entry_points.txt +0 -0
  218. {scitex-2.14.0.dist-info → scitex-2.15.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,344 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-29 03:10:08 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/_ResolverLinkFinder.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import os
9
+
10
+ __FILE__ = (
11
+ "./src/scitex/scholar/open_url/_ResolverLinkFinder.py"
12
+ )
13
+ __DIR__ = os.path.dirname(__FILE__)
14
+ # ----------------------------------------
15
+
16
+ """Robust resolver link finder using a prioritized, multi-layered approach.
17
+
18
+ Priority order:
19
+ 1. Link Target (domain matching) - Most reliable
20
+ 2. Page Structure (CSS selectors) - Very reliable
21
+ 3. Text Patterns - Good fallback
22
+ """
23
+
24
+ import re
25
+ from typing import List, Optional
26
+ from urllib.parse import urlparse
27
+
28
+ from playwright.async_api import ElementHandle, Page
29
+
30
+ from scitex import logging
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class ResolverLinkFinder:
36
+ """Finds full-text links on resolver pages using multiple strategies."""
37
+
38
+ # DOI prefix to publisher domain mapping
39
+ DOI_TO_DOMAIN = {
40
+ "10.1038": [
41
+ "nature.com",
42
+ "springernature.com",
43
+ ], # Nature Publishing Group
44
+ "10.1016": ["sciencedirect.com", "elsevier.com"], # Elsevier
45
+ "10.1002": ["wiley.com", "onlinelibrary.wiley.com"], # Wiley
46
+ "10.1007": ["springer.com", "link.springer.com"], # Springer
47
+ "10.1126": ["science.org", "sciencemag.org"], # Science/AAAS
48
+ "10.1021": ["acs.org", "pubs.acs.org"], # ACS Publications
49
+ "10.1111": [
50
+ "wiley.com",
51
+ "onlinelibrary.wiley.com",
52
+ ], # Wiley (alternative)
53
+ "10.1080": ["tandfonline.com"], # Taylor & Francis
54
+ "10.1177": ["sagepub.com", "journals.sagepub.com"], # SAGE
55
+ "10.1093": ["oup.com", "academic.oup.com"], # Oxford
56
+ "10.1109": ["ieee.org", "ieeexplore.ieee.org"], # IEEE
57
+ "10.1371": ["plos.org", "journals.plos.org"], # PLOS
58
+ "10.1073": ["pnas.org"], # PNAS
59
+ "10.1136": ["bmj.com"], # BMJ
60
+ "10.3389": ["frontiersin.org"], # Frontiers
61
+ "10.3390": ["mdpi.com"], # MDPI
62
+ }
63
+
64
+ # Common resolver page structures
65
+ STRUCTURE_SELECTORS = [
66
+ # SFX (ExLibris) - used by many universities
67
+ "div#fulltext a",
68
+ "div.sfx-fulltext a",
69
+ "div.results-title > a",
70
+ "td.object-cell a",
71
+ ".getFullTxt a",
72
+ 'div[id*="fulltext"] a',
73
+ 'div[class*="fulltext"] a',
74
+ # SFX specific selectors for University of Melbourne
75
+ "a[title*='Wiley Online Library']",
76
+ "a[href*='wiley.com']",
77
+ "a[href*='onlinelibrary.wiley.com']",
78
+ ".sfx-target a",
79
+ ".target a",
80
+ "td a[href*='wiley']",
81
+ # Primo (ExLibris)
82
+ "prm-full-view-service-container a",
83
+ "span.availability-status-available a",
84
+ # Summon (ProQuest)
85
+ ".summon-fulltext-link",
86
+ "a.summon-link",
87
+ # EDS (EBSCO)
88
+ "a.fulltext-link",
89
+ ".ft-link a",
90
+ # Generic patterns
91
+ "a.full-text-link",
92
+ "a.fulltext",
93
+ "a#full-text-link",
94
+ ".access-link a",
95
+ ".available-link a",
96
+ ]
97
+
98
+ # Text patterns in priority order
99
+ TEXT_PATTERNS = [
100
+ # Most specific
101
+ "View full text at",
102
+ "Available from Nature",
103
+ "Available from ScienceDirect",
104
+ "Available from Wiley",
105
+ "Available from Wiley Online Library",
106
+ "Full text available from",
107
+ # Common patterns
108
+ "View full text",
109
+ "Full Text from Publisher",
110
+ "Get full text",
111
+ "Access full text",
112
+ "Go to article",
113
+ "Access article",
114
+ # Generic but reliable
115
+ "Full Text",
116
+ "Full text",
117
+ "Article",
118
+ "View",
119
+ "PDF",
120
+ "Download",
121
+ ]
122
+
123
+ def __init__(self):
124
+ self._doi_pattern = re.compile(r"10\.\d{4,}/[-._;()/:\w]+")
125
+
126
+ def get_expected_domains(self, doi: str) -> List[str]:
127
+ """Get expected publisher domains for a DOI."""
128
+ # Extract DOI prefix
129
+ match = re.match(r"(10\.\d{4,})", doi)
130
+ if not match:
131
+ return []
132
+
133
+ prefix = match.group(1)
134
+ return self.DOI_TO_DOMAIN.get(prefix, [])
135
+
136
+ async def find_link_async(self, page, doi: str) -> dict:
137
+ """Find the best full-text link using prioritized strategies."""
138
+ logger.info(f"Finding resolver link for DOI: {doi}")
139
+
140
+ # Strategy 1: Link Target (Most Reliable)
141
+ link_url = await self._find_by_domain_async(page, doi)
142
+ if link_url:
143
+ logger.info("✓ Found link using domain matching (Strategy 1)")
144
+ return {"success": True, "url": link_url, "method": "domain"}
145
+
146
+ # Strategy 2: Page Structure with scoring
147
+ link_url = await self._find_by_structure_async(page, doi)
148
+ if link_url:
149
+ logger.info("✓ Found link using page structure (Strategy 2)")
150
+ return {"success": True, "url": link_url, "method": "structure"}
151
+
152
+ logger.warning("✗ No suitable links found")
153
+ return {"success": False, "url": None, "method": None}
154
+
155
+ async def _find_by_domain_async(self, page: Page, doi: str) -> Optional[str]:
156
+ """Strategy 1: Find link by expected publisher domain."""
157
+ expected_domains = self.get_expected_domains(doi)
158
+ if not expected_domains:
159
+ logger.debug(f"No known publisher domains for DOI prefix: {doi}")
160
+ return None
161
+
162
+ logger.debug(f"Looking for links to domains: {expected_domains}")
163
+ all_links = await page.query_selector_all("a[href]")
164
+
165
+ for link in all_links:
166
+ href = await link.get_attribute("href")
167
+ if not href:
168
+ continue
169
+
170
+ try:
171
+ parsed = urlparse(href)
172
+ domain = parsed.netloc.lower()
173
+
174
+ for expected in expected_domains:
175
+ if expected in domain:
176
+ text = await link.inner_text() or ""
177
+ logger.info(
178
+ f"Found domain match: {domain} (text: '{text[:50]}')"
179
+ )
180
+
181
+ if not any(
182
+ bad in text.lower()
183
+ for bad in ["abstract", "preview", "summary"]
184
+ ):
185
+ return href
186
+ else:
187
+ logger.debug(
188
+ f"Skipping abstract/preview link: {text}"
189
+ )
190
+ except Exception as e:
191
+ logger.debug(f"Error parsing URL {href}: {e}")
192
+
193
+ return None
194
+
195
+ async def _find_by_structure_async(self, page, doi: str):
196
+ """Find link by page structure with publisher prioritization."""
197
+ potential_links = []
198
+ expected_domains = self.get_expected_domains(doi)
199
+ publisher_keywords = [
200
+ domain.split(".")[0] for domain in expected_domains
201
+ ]
202
+ aggregator_keywords = ["gale", "proquest", "ebsco", "jstor", "onefile"]
203
+
204
+ # Gather all possible links
205
+ for selector in self.STRUCTURE_SELECTORS:
206
+ try:
207
+ elements = await page.query_selector_all(selector)
208
+ logger.debug(
209
+ f"Found {len(elements)} elements with selector: {selector}"
210
+ )
211
+
212
+ for element in elements:
213
+ if await element.is_visible():
214
+ href = await element.get_attribute("href")
215
+ text = (await element.inner_text() or "").lower()
216
+
217
+ if href and href.strip():
218
+ potential_links.append(
219
+ {"href": href, "text": text, "score": 0}
220
+ )
221
+ except Exception as element_error:
222
+ logger.debug(
223
+ f"Error with selector '{selector}': {element_error}"
224
+ )
225
+
226
+ if not potential_links:
227
+ return None
228
+
229
+ # Score the links
230
+ for link in potential_links:
231
+ # Highest score for direct publisher match
232
+ if any(keyword in link["text"] for keyword in publisher_keywords):
233
+ link["score"] = 3
234
+ # High score for generic publisher
235
+ elif "publisher" in link["text"]:
236
+ link["score"] = 2
237
+ # Negative score for aggregators
238
+ elif any(
239
+ keyword in link["text"] for keyword in aggregator_keywords
240
+ ):
241
+ link["score"] = -1
242
+ # Default neutral score
243
+ else:
244
+ link["score"] = 0
245
+
246
+ # Sort by score, highest first
247
+ sorted_links = sorted(
248
+ potential_links, key=lambda x: x["score"], reverse=True
249
+ )
250
+ best_link = sorted_links[0]
251
+
252
+ logger.debug(
253
+ f"Found structural match: '{best_link['text'][:50]}' -> {best_link['href']}"
254
+ )
255
+ return best_link["href"]
256
+
257
+ async def _find_by_text_async(self, page: Page) -> Optional[str]:
258
+ """Strategy 3: Find link by text patterns."""
259
+ for pattern in self.TEXT_PATTERNS:
260
+ try:
261
+ selector = f'a:has-text("{pattern}")'
262
+ link = await page.query_selector(selector)
263
+ if link and await link.is_visible():
264
+ href = await link.get_attribute("href")
265
+ if href and href.strip():
266
+ logger.debug(
267
+ f"Found text match: '{pattern}' -> {href[:100]}"
268
+ )
269
+ return href
270
+ except Exception as e:
271
+ logger.debug(f"Error with text pattern '{pattern}': {e}")
272
+
273
+ return None
274
+
275
+ async def click_and_wait_async(self, page: Page, link: ElementHandle) -> bool:
276
+ """Click link and wait for navigation.
277
+
278
+ Returns True if navigation succeeded.
279
+ """
280
+ initial_url = page.url
281
+
282
+ try:
283
+ # Get link info for logging
284
+ href = await link.get_attribute("href") or ""
285
+ text = await link.inner_text() or ""
286
+ logger.info(f"Clicking link: '{text[:50]}' -> {href[:100]}")
287
+
288
+ # Click and wait for navigation
289
+ await link.click()
290
+
291
+ # Wait for either navigation or network idle
292
+ try:
293
+ await page.wait_for_load_state("networkidle", timeout=30000)
294
+ except:
295
+ # Fallback to domcontentloaded if network doesn't settle
296
+ await page.wait_for_load_state(
297
+ "domcontentloaded", timeout=30000
298
+ )
299
+
300
+ # Additional wait for JavaScript redirects
301
+ await page.wait_for_timeout(3000)
302
+
303
+ # Check if we navigated
304
+ final_url = page.url
305
+ if final_url != initial_url:
306
+ logger.info(
307
+ f"Successfully navigated: {initial_url} -> {final_url}"
308
+ )
309
+ return True
310
+ else:
311
+ logger.warning("No navigation occurred after click")
312
+ return False
313
+
314
+ except Exception as e:
315
+ logger.error(f"Error during click and navigation: {e}")
316
+ return False
317
+
318
+
319
+ # Convenience function for integration
320
+ async def find_and_click_resolver_link_async(page: Page, doi: str) -> Optional[str]:
321
+ """Find and click the best resolver link.
322
+
323
+ Args:
324
+ page: Playwright page object
325
+ doi: Target DOI
326
+
327
+ Returns:
328
+ Final URL after navigation, or None if failed
329
+ """
330
+ finder = ResolverLinkFinder()
331
+
332
+ # Find link
333
+ link = await finder.find_link_async(page, doi)
334
+ if not link:
335
+ return None
336
+
337
+ # Click and navigate
338
+ success = await finder.click_and_wait_async(page, link)
339
+ if success:
340
+ return page.url
341
+ else:
342
+ return None
343
+
344
+ # EOF
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-31 00:53:24 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/__init__.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import os
9
+
10
+ __FILE__ = (
11
+ "./src/scitex/scholar/open_url/__init__.py"
12
+ )
13
+ __DIR__ = os.path.dirname(__FILE__)
14
+ # ----------------------------------------
15
+
16
+ from ._DOIToURLResolver import DOIToURLResolver
17
+ from ._OpenURLResolver import OpenURLResolver
18
+
19
+ __all__ = [
20
+ "OpenURLResolver",
21
+ "DOIToURLResolver",
22
+ ]
23
+
24
+ # EOF
scitex/security/README.md CHANGED
@@ -14,7 +14,7 @@ Reusable security utilities for the SciTeX ecosystem. Works everywhere: local, c
14
14
  The module is part of the `scitex` package (editable install):
15
15
 
16
16
  ```bash
17
- cd ~/proj/scitex-code
17
+ cd ~/proj/scitex-python
18
18
  pip install -e .
19
19
  ```
20
20
 
@@ -162,7 +162,7 @@ Total open alerts: 2
162
162
  ```yaml
163
163
  - name: Check Security
164
164
  run: |
165
- pip install -e ~/proj/scitex-code
165
+ pip install -e ~/proj/scitex-python
166
166
  python -m scitex.security.cli check --save
167
167
  ```
168
168
 
@@ -245,7 +245,7 @@ Get path to latest alerts file.
245
245
  ## Testing
246
246
 
247
247
  ```bash
248
- cd ~/proj/scitex-code
248
+ cd ~/proj/scitex-python
249
249
  pytest tests/test_security.py
250
250
  ```
251
251
 
scitex/session/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  <!-- ---
2
2
  !-- Timestamp: 2025-11-18 10:14:48
3
3
  !-- Author: ywatanabe
4
- !-- File: /home/ywatanabe/proj/scitex-code/src/scitex/session/README.md
4
+ !-- File: /home/ywatanabe/proj/scitex-python/src/scitex/session/README.md
5
5
  !-- --- -->
6
6
 
7
7
  # scitex.session
scitex/sh/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  <!-- ---
2
2
  !-- Timestamp: 2025-10-29 07:23:56
3
3
  !-- Author: ywatanabe
4
- !-- File: /home/ywatanabe/proj/scitex-code/src/scitex/sh/README.md
4
+ !-- File: /home/ywatanabe/proj/scitex-python/src/scitex/sh/README.md
5
5
  !-- --- -->
6
6
 
7
7
  # scitex.sh - Shell Command Execution Module
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: 2026-01-22
3
+ # File: /home/ywatanabe/proj/scitex-code/src/scitex/social/__init__.py
4
+
5
+ """SciTeX Social - Unified social media management.
6
+
7
+ This module provides a thin wrapper around socialia, the core social media
8
+ integration package. It uses scitex branding and environment variable prefixes.
9
+
10
+ Features
11
+ --------
12
+ - Twitter/X posting and analytics
13
+ - LinkedIn posting
14
+ - Reddit posting
15
+ - YouTube analytics
16
+ - Google Analytics integration
17
+
18
+ Environment Variables
19
+ ---------------------
20
+ Credentials use SCITEX_ prefix (falls back to SOCIALIA_):
21
+ - SCITEX_X_CONSUMER_KEY, SCITEX_X_CONSUMER_SECRET
22
+ - SCITEX_X_ACCESS_TOKEN, SCITEX_X_ACCESS_TOKEN_SECRET
23
+ - SCITEX_LINKEDIN_ACCESS_TOKEN
24
+ - SCITEX_REDDIT_CLIENT_ID, SCITEX_REDDIT_CLIENT_SECRET
25
+ - SCITEX_YOUTUBE_API_KEY
26
+ - SCITEX_GA_PROPERTY_ID
27
+
28
+ Usage
29
+ -----
30
+ import scitex as stx
31
+
32
+ # Twitter/X
33
+ x = stx.social.Twitter()
34
+ x.post("Hello from SciTeX!")
35
+
36
+ # LinkedIn
37
+ linkedin = stx.social.LinkedIn()
38
+ linkedin.post("Research update", visibility="public")
39
+
40
+ # YouTube analytics
41
+ yt = stx.social.YouTube()
42
+ stats = yt.get_channel_stats()
43
+
44
+ # Google Analytics
45
+ ga = stx.social.GoogleAnalytics()
46
+ report = ga.get_report(start_date="7daysAgo")
47
+
48
+ See Also
49
+ --------
50
+ - socialia: https://github.com/ywatanabe1989/socialia
51
+ - scitex: https://scitex.ai
52
+ """
53
+
54
+ import os as _os
55
+
56
+ # Set branding BEFORE importing socialia
57
+ _os.environ.setdefault("SOCIALIA_BRAND", "scitex.social")
58
+ _os.environ.setdefault("SOCIALIA_ENV_PREFIX", "SCITEX")
59
+
60
+ # Check socialia availability
61
+ try:
62
+ import socialia as _socialia
63
+
64
+ # Re-export platform clients
65
+ from socialia import (
66
+ # Content strategies for MCP
67
+ PLATFORM_STRATEGIES,
68
+ # Base class
69
+ BasePoster,
70
+ GoogleAnalytics,
71
+ LinkedIn,
72
+ LinkedInPoster,
73
+ Reddit,
74
+ RedditPoster,
75
+ # Platform clients (preferred names)
76
+ Twitter,
77
+ # Backward compatibility aliases
78
+ TwitterPoster,
79
+ YouTube,
80
+ YouTubePoster,
81
+ )
82
+ from socialia import __version__ as _socialia_version
83
+
84
+ SOCIALIA_AVAILABLE = True
85
+ __socialia_version__ = _socialia_version
86
+
87
+ except ImportError:
88
+ SOCIALIA_AVAILABLE = False
89
+ __socialia_version__ = None
90
+
91
+ # Provide helpful error on access
92
+ class _SocialiaNotAvailable:
93
+ """Placeholder when socialia is not installed."""
94
+
95
+ def __init__(self, *args, **kwargs):
96
+ raise ImportError(
97
+ "socialia is required for scitex.social. "
98
+ "Install with: pip install socialia"
99
+ )
100
+
101
+ def __getattr__(self, name):
102
+ raise ImportError(
103
+ "socialia is required for scitex.social. "
104
+ "Install with: pip install socialia"
105
+ )
106
+
107
+ BasePoster = _SocialiaNotAvailable
108
+ Twitter = _SocialiaNotAvailable
109
+ LinkedIn = _SocialiaNotAvailable
110
+ Reddit = _SocialiaNotAvailable
111
+ YouTube = _SocialiaNotAvailable
112
+ GoogleAnalytics = _SocialiaNotAvailable
113
+ TwitterPoster = _SocialiaNotAvailable
114
+ LinkedInPoster = _SocialiaNotAvailable
115
+ RedditPoster = _SocialiaNotAvailable
116
+ YouTubePoster = _SocialiaNotAvailable
117
+ PLATFORM_STRATEGIES = ""
118
+
119
+
120
+ def has_socialia() -> bool:
121
+ """Check if socialia is available.
122
+
123
+ Returns
124
+ -------
125
+ bool
126
+ True if socialia is installed and importable.
127
+ """
128
+ return SOCIALIA_AVAILABLE
129
+
130
+
131
+ __all__ = [
132
+ # Availability check
133
+ "SOCIALIA_AVAILABLE",
134
+ "has_socialia",
135
+ "__socialia_version__",
136
+ # Base class
137
+ "BasePoster",
138
+ # Platform clients (preferred names)
139
+ "Twitter",
140
+ "LinkedIn",
141
+ "Reddit",
142
+ "YouTube",
143
+ "GoogleAnalytics",
144
+ # Backward compatibility aliases
145
+ "TwitterPoster",
146
+ "LinkedInPoster",
147
+ "RedditPoster",
148
+ "YouTubePoster",
149
+ # Content strategies
150
+ "PLATFORM_STRATEGIES",
151
+ ]
152
+
153
+ # EOF