scitex 2.16.0__py3-none-any.whl → 2.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. scitex/_mcp_tools/audio.py +11 -65
  2. scitex/audio/README.md +40 -12
  3. scitex/audio/__init__.py +27 -235
  4. scitex/audio/_audio_check.py +93 -0
  5. scitex/audio/_mcp/speak_handlers.py +56 -8
  6. scitex/audio/_speak.py +295 -0
  7. scitex/audio/mcp_server.py +98 -73
  8. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  9. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  10. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  11. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  12. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  13. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  14. scitex/social/__init__.py +1 -24
  15. scitex/writer/README.md +25 -409
  16. scitex/writer/__init__.py +98 -13
  17. {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/METADATA +6 -1
  18. {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/RECORD +21 -93
  19. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  20. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  21. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  22. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  23. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  24. scitex/scholar/data/.gitkeep +0 -0
  25. scitex/scholar/data/README.md +0 -44
  26. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  27. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  28. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  29. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  30. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  31. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  32. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  33. scitex/scholar/data/bib_files/pac.bib +0 -698
  34. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  35. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  36. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  37. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  38. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  39. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  40. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  41. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  42. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  43. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  44. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  45. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  46. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  47. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  48. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  49. scitex/scholar/data/impact_factor.db +0 -0
  50. scitex/writer/Writer.py +0 -487
  51. scitex/writer/_clone_writer_project.py +0 -160
  52. scitex/writer/_compile/__init__.py +0 -41
  53. scitex/writer/_compile/_compile_async.py +0 -130
  54. scitex/writer/_compile/_compile_unified.py +0 -148
  55. scitex/writer/_compile/_parser.py +0 -63
  56. scitex/writer/_compile/_runner.py +0 -457
  57. scitex/writer/_compile/_validator.py +0 -46
  58. scitex/writer/_compile/manuscript.py +0 -110
  59. scitex/writer/_compile/revision.py +0 -82
  60. scitex/writer/_compile/supplementary.py +0 -100
  61. scitex/writer/_dataclasses/__init__.py +0 -44
  62. scitex/writer/_dataclasses/config/_CONSTANTS.py +0 -46
  63. scitex/writer/_dataclasses/config/_WriterConfig.py +0 -175
  64. scitex/writer/_dataclasses/config/__init__.py +0 -9
  65. scitex/writer/_dataclasses/contents/_ManuscriptContents.py +0 -236
  66. scitex/writer/_dataclasses/contents/_RevisionContents.py +0 -136
  67. scitex/writer/_dataclasses/contents/_SupplementaryContents.py +0 -114
  68. scitex/writer/_dataclasses/contents/__init__.py +0 -9
  69. scitex/writer/_dataclasses/core/_Document.py +0 -146
  70. scitex/writer/_dataclasses/core/_DocumentSection.py +0 -546
  71. scitex/writer/_dataclasses/core/__init__.py +0 -7
  72. scitex/writer/_dataclasses/results/_CompilationResult.py +0 -165
  73. scitex/writer/_dataclasses/results/_LaTeXIssue.py +0 -102
  74. scitex/writer/_dataclasses/results/_SaveSectionsResponse.py +0 -118
  75. scitex/writer/_dataclasses/results/_SectionReadResponse.py +0 -131
  76. scitex/writer/_dataclasses/results/__init__.py +0 -11
  77. scitex/writer/_dataclasses/tree/MINIMUM_FILES.md +0 -121
  78. scitex/writer/_dataclasses/tree/_ConfigTree.py +0 -86
  79. scitex/writer/_dataclasses/tree/_ManuscriptTree.py +0 -84
  80. scitex/writer/_dataclasses/tree/_RevisionTree.py +0 -97
  81. scitex/writer/_dataclasses/tree/_ScriptsTree.py +0 -118
  82. scitex/writer/_dataclasses/tree/_SharedTree.py +0 -100
  83. scitex/writer/_dataclasses/tree/_SupplementaryTree.py +0 -101
  84. scitex/writer/_dataclasses/tree/__init__.py +0 -23
  85. scitex/writer/_mcp/__init__.py +0 -4
  86. scitex/writer/_mcp/handlers.py +0 -32
  87. scitex/writer/_mcp/tool_schemas.py +0 -33
  88. scitex/writer/_project/__init__.py +0 -29
  89. scitex/writer/_project/_create.py +0 -89
  90. scitex/writer/_project/_trees.py +0 -63
  91. scitex/writer/_project/_validate.py +0 -61
  92. scitex/writer/utils/.legacy_git_retry.py +0 -164
  93. scitex/writer/utils/__init__.py +0 -24
  94. scitex/writer/utils/_converters.py +0 -635
  95. scitex/writer/utils/_parse_latex_logs.py +0 -138
  96. scitex/writer/utils/_parse_script_args.py +0 -156
  97. scitex/writer/utils/_verify_tree_structure.py +0 -205
  98. scitex/writer/utils/_watch.py +0 -96
  99. {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/WHEEL +0 -0
  100. {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/entry_points.txt +0 -0
  101. {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,344 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-29 03:10:08 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/_ResolverLinkFinder.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import os
9
+
10
+ __FILE__ = (
11
+ "./src/scitex/scholar/open_url/_ResolverLinkFinder.py"
12
+ )
13
+ __DIR__ = os.path.dirname(__FILE__)
14
+ # ----------------------------------------
15
+
16
+ """Robust resolver link finder using a prioritized, multi-layered approach.
17
+
18
+ Priority order:
19
+ 1. Link Target (domain matching) - Most reliable
20
+ 2. Page Structure (CSS selectors) - Very reliable
21
+ 3. Text Patterns - Good fallback
22
+ """
23
+
24
+ import re
25
+ from typing import List, Optional
26
+ from urllib.parse import urlparse
27
+
28
+ from playwright.async_api import ElementHandle, Page
29
+
30
+ from scitex import logging
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class ResolverLinkFinder:
36
+ """Finds full-text links on resolver pages using multiple strategies."""
37
+
38
+ # DOI prefix to publisher domain mapping
39
+ DOI_TO_DOMAIN = {
40
+ "10.1038": [
41
+ "nature.com",
42
+ "springernature.com",
43
+ ], # Nature Publishing Group
44
+ "10.1016": ["sciencedirect.com", "elsevier.com"], # Elsevier
45
+ "10.1002": ["wiley.com", "onlinelibrary.wiley.com"], # Wiley
46
+ "10.1007": ["springer.com", "link.springer.com"], # Springer
47
+ "10.1126": ["science.org", "sciencemag.org"], # Science/AAAS
48
+ "10.1021": ["acs.org", "pubs.acs.org"], # ACS Publications
49
+ "10.1111": [
50
+ "wiley.com",
51
+ "onlinelibrary.wiley.com",
52
+ ], # Wiley (alternative)
53
+ "10.1080": ["tandfonline.com"], # Taylor & Francis
54
+ "10.1177": ["sagepub.com", "journals.sagepub.com"], # SAGE
55
+ "10.1093": ["oup.com", "academic.oup.com"], # Oxford
56
+ "10.1109": ["ieee.org", "ieeexplore.ieee.org"], # IEEE
57
+ "10.1371": ["plos.org", "journals.plos.org"], # PLOS
58
+ "10.1073": ["pnas.org"], # PNAS
59
+ "10.1136": ["bmj.com"], # BMJ
60
+ "10.3389": ["frontiersin.org"], # Frontiers
61
+ "10.3390": ["mdpi.com"], # MDPI
62
+ }
63
+
64
+ # Common resolver page structures
65
+ STRUCTURE_SELECTORS = [
66
+ # SFX (ExLibris) - used by many universities
67
+ "div#fulltext a",
68
+ "div.sfx-fulltext a",
69
+ "div.results-title > a",
70
+ "td.object-cell a",
71
+ ".getFullTxt a",
72
+ 'div[id*="fulltext"] a',
73
+ 'div[class*="fulltext"] a',
74
+ # SFX specific selectors for University of Melbourne
75
+ "a[title*='Wiley Online Library']",
76
+ "a[href*='wiley.com']",
77
+ "a[href*='onlinelibrary.wiley.com']",
78
+ ".sfx-target a",
79
+ ".target a",
80
+ "td a[href*='wiley']",
81
+ # Primo (ExLibris)
82
+ "prm-full-view-service-container a",
83
+ "span.availability-status-available a",
84
+ # Summon (ProQuest)
85
+ ".summon-fulltext-link",
86
+ "a.summon-link",
87
+ # EDS (EBSCO)
88
+ "a.fulltext-link",
89
+ ".ft-link a",
90
+ # Generic patterns
91
+ "a.full-text-link",
92
+ "a.fulltext",
93
+ "a#full-text-link",
94
+ ".access-link a",
95
+ ".available-link a",
96
+ ]
97
+
98
+ # Text patterns in priority order
99
+ TEXT_PATTERNS = [
100
+ # Most specific
101
+ "View full text at",
102
+ "Available from Nature",
103
+ "Available from ScienceDirect",
104
+ "Available from Wiley",
105
+ "Available from Wiley Online Library",
106
+ "Full text available from",
107
+ # Common patterns
108
+ "View full text",
109
+ "Full Text from Publisher",
110
+ "Get full text",
111
+ "Access full text",
112
+ "Go to article",
113
+ "Access article",
114
+ # Generic but reliable
115
+ "Full Text",
116
+ "Full text",
117
+ "Article",
118
+ "View",
119
+ "PDF",
120
+ "Download",
121
+ ]
122
+
123
+ def __init__(self):
124
+ self._doi_pattern = re.compile(r"10\.\d{4,}/[-._;()/:\w]+")
125
+
126
+ def get_expected_domains(self, doi: str) -> List[str]:
127
+ """Get expected publisher domains for a DOI."""
128
+ # Extract DOI prefix
129
+ match = re.match(r"(10\.\d{4,})", doi)
130
+ if not match:
131
+ return []
132
+
133
+ prefix = match.group(1)
134
+ return self.DOI_TO_DOMAIN.get(prefix, [])
135
+
136
+ async def find_link_async(self, page, doi: str) -> dict:
137
+ """Find the best full-text link using prioritized strategies."""
138
+ logger.info(f"Finding resolver link for DOI: {doi}")
139
+
140
+ # Strategy 1: Link Target (Most Reliable)
141
+ link_url = await self._find_by_domain_async(page, doi)
142
+ if link_url:
143
+ logger.info("✓ Found link using domain matching (Strategy 1)")
144
+ return {"success": True, "url": link_url, "method": "domain"}
145
+
146
+ # Strategy 2: Page Structure with scoring
147
+ link_url = await self._find_by_structure_async(page, doi)
148
+ if link_url:
149
+ logger.info("✓ Found link using page structure (Strategy 2)")
150
+ return {"success": True, "url": link_url, "method": "structure"}
151
+
152
+ logger.warning("✗ No suitable links found")
153
+ return {"success": False, "url": None, "method": None}
154
+
155
+ async def _find_by_domain_async(self, page: Page, doi: str) -> Optional[str]:
156
+ """Strategy 1: Find link by expected publisher domain."""
157
+ expected_domains = self.get_expected_domains(doi)
158
+ if not expected_domains:
159
+ logger.debug(f"No known publisher domains for DOI prefix: {doi}")
160
+ return None
161
+
162
+ logger.debug(f"Looking for links to domains: {expected_domains}")
163
+ all_links = await page.query_selector_all("a[href]")
164
+
165
+ for link in all_links:
166
+ href = await link.get_attribute("href")
167
+ if not href:
168
+ continue
169
+
170
+ try:
171
+ parsed = urlparse(href)
172
+ domain = parsed.netloc.lower()
173
+
174
+ for expected in expected_domains:
175
+ if expected in domain:
176
+ text = await link.inner_text() or ""
177
+ logger.info(
178
+ f"Found domain match: {domain} (text: '{text[:50]}')"
179
+ )
180
+
181
+ if not any(
182
+ bad in text.lower()
183
+ for bad in ["abstract", "preview", "summary"]
184
+ ):
185
+ return href
186
+ else:
187
+ logger.debug(
188
+ f"Skipping abstract/preview link: {text}"
189
+ )
190
+ except Exception as e:
191
+ logger.debug(f"Error parsing URL {href}: {e}")
192
+
193
+ return None
194
+
195
+ async def _find_by_structure_async(self, page, doi: str):
196
+ """Find link by page structure with publisher prioritization."""
197
+ potential_links = []
198
+ expected_domains = self.get_expected_domains(doi)
199
+ publisher_keywords = [
200
+ domain.split(".")[0] for domain in expected_domains
201
+ ]
202
+ aggregator_keywords = ["gale", "proquest", "ebsco", "jstor", "onefile"]
203
+
204
+ # Gather all possible links
205
+ for selector in self.STRUCTURE_SELECTORS:
206
+ try:
207
+ elements = await page.query_selector_all(selector)
208
+ logger.debug(
209
+ f"Found {len(elements)} elements with selector: {selector}"
210
+ )
211
+
212
+ for element in elements:
213
+ if await element.is_visible():
214
+ href = await element.get_attribute("href")
215
+ text = (await element.inner_text() or "").lower()
216
+
217
+ if href and href.strip():
218
+ potential_links.append(
219
+ {"href": href, "text": text, "score": 0}
220
+ )
221
+ except Exception as element_error:
222
+ logger.debug(
223
+ f"Error with selector '{selector}': {element_error}"
224
+ )
225
+
226
+ if not potential_links:
227
+ return None
228
+
229
+ # Score the links
230
+ for link in potential_links:
231
+ # Highest score for direct publisher match
232
+ if any(keyword in link["text"] for keyword in publisher_keywords):
233
+ link["score"] = 3
234
+ # High score for generic publisher
235
+ elif "publisher" in link["text"]:
236
+ link["score"] = 2
237
+ # Negative score for aggregators
238
+ elif any(
239
+ keyword in link["text"] for keyword in aggregator_keywords
240
+ ):
241
+ link["score"] = -1
242
+ # Default neutral score
243
+ else:
244
+ link["score"] = 0
245
+
246
+ # Sort by score, highest first
247
+ sorted_links = sorted(
248
+ potential_links, key=lambda x: x["score"], reverse=True
249
+ )
250
+ best_link = sorted_links[0]
251
+
252
+ logger.debug(
253
+ f"Found structural match: '{best_link['text'][:50]}' -> {best_link['href']}"
254
+ )
255
+ return best_link["href"]
256
+
257
+ async def _find_by_text_async(self, page: Page) -> Optional[str]:
258
+ """Strategy 3: Find link by text patterns."""
259
+ for pattern in self.TEXT_PATTERNS:
260
+ try:
261
+ selector = f'a:has-text("{pattern}")'
262
+ link = await page.query_selector(selector)
263
+ if link and await link.is_visible():
264
+ href = await link.get_attribute("href")
265
+ if href and href.strip():
266
+ logger.debug(
267
+ f"Found text match: '{pattern}' -> {href[:100]}"
268
+ )
269
+ return href
270
+ except Exception as e:
271
+ logger.debug(f"Error with text pattern '{pattern}': {e}")
272
+
273
+ return None
274
+
275
+ async def click_and_wait_async(self, page: Page, link: ElementHandle) -> bool:
276
+ """Click link and wait for navigation.
277
+
278
+ Returns True if navigation succeeded.
279
+ """
280
+ initial_url = page.url
281
+
282
+ try:
283
+ # Get link info for logging
284
+ href = await link.get_attribute("href") or ""
285
+ text = await link.inner_text() or ""
286
+ logger.info(f"Clicking link: '{text[:50]}' -> {href[:100]}")
287
+
288
+ # Click and wait for navigation
289
+ await link.click()
290
+
291
+ # Wait for either navigation or network idle
292
+ try:
293
+ await page.wait_for_load_state("networkidle", timeout=30000)
294
+ except:
295
+ # Fallback to domcontentloaded if network doesn't settle
296
+ await page.wait_for_load_state(
297
+ "domcontentloaded", timeout=30000
298
+ )
299
+
300
+ # Additional wait for JavaScript redirects
301
+ await page.wait_for_timeout(3000)
302
+
303
+ # Check if we navigated
304
+ final_url = page.url
305
+ if final_url != initial_url:
306
+ logger.info(
307
+ f"Successfully navigated: {initial_url} -> {final_url}"
308
+ )
309
+ return True
310
+ else:
311
+ logger.warning("No navigation occurred after click")
312
+ return False
313
+
314
+ except Exception as e:
315
+ logger.error(f"Error during click and navigation: {e}")
316
+ return False
317
+
318
+
319
+ # Convenience function for integration
320
+ async def find_and_click_resolver_link_async(page: Page, doi: str) -> Optional[str]:
321
+ """Find and click the best resolver link.
322
+
323
+ Args:
324
+ page: Playwright page object
325
+ doi: Target DOI
326
+
327
+ Returns:
328
+ Final URL after navigation, or None if failed
329
+ """
330
+ finder = ResolverLinkFinder()
331
+
332
+ # Find link
333
+ link = await finder.find_link_async(page, doi)
334
+ if not link:
335
+ return None
336
+
337
+ # Click and navigate
338
+ success = await finder.click_and_wait_async(page, link)
339
+ if success:
340
+ return page.url
341
+ else:
342
+ return None
343
+
344
+ # EOF
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-07-31 00:53:24 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/__init__.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import os
9
+
10
+ __FILE__ = (
11
+ "./src/scitex/scholar/open_url/__init__.py"
12
+ )
13
+ __DIR__ = os.path.dirname(__FILE__)
14
+ # ----------------------------------------
15
+
16
+ from ._DOIToURLResolver import DOIToURLResolver
17
+ from ._OpenURLResolver import OpenURLResolver
18
+
19
+ __all__ = [
20
+ "OpenURLResolver",
21
+ "DOIToURLResolver",
22
+ ]
23
+
24
+ # EOF
scitex/social/__init__.py CHANGED
@@ -61,25 +61,14 @@ _os.environ.setdefault("SOCIALIA_ENV_PREFIX", "SCITEX_SOCIAL")
61
61
 
62
62
  # Check socialia availability
63
63
  try:
64
- import socialia as _socialia
65
-
66
64
  # Re-export platform clients
67
- from socialia import (
68
- # Content strategies for MCP
65
+ from socialia import ( # Content strategies for MCP; Platform clients (preferred names)
69
66
  PLATFORM_STRATEGIES,
70
- # Base class
71
- BasePoster,
72
67
  GoogleAnalytics,
73
68
  LinkedIn,
74
- LinkedInPoster,
75
69
  Reddit,
76
- RedditPoster,
77
- # Platform clients (preferred names)
78
70
  Twitter,
79
- # Backward compatibility aliases
80
- TwitterPoster,
81
71
  YouTube,
82
- YouTubePoster,
83
72
  )
84
73
  from socialia import __version__ as _socialia_version
85
74
 
@@ -106,16 +95,11 @@ except ImportError:
106
95
  "Install with: pip install socialia"
107
96
  )
108
97
 
109
- BasePoster = _SocialiaNotAvailable
110
98
  Twitter = _SocialiaNotAvailable
111
99
  LinkedIn = _SocialiaNotAvailable
112
100
  Reddit = _SocialiaNotAvailable
113
101
  YouTube = _SocialiaNotAvailable
114
102
  GoogleAnalytics = _SocialiaNotAvailable
115
- TwitterPoster = _SocialiaNotAvailable
116
- LinkedInPoster = _SocialiaNotAvailable
117
- RedditPoster = _SocialiaNotAvailable
118
- YouTubePoster = _SocialiaNotAvailable
119
103
  PLATFORM_STRATEGIES = ""
120
104
 
121
105
 
@@ -135,19 +119,12 @@ __all__ = [
135
119
  "SOCIALIA_AVAILABLE",
136
120
  "has_socialia",
137
121
  "__socialia_version__",
138
- # Base class
139
- "BasePoster",
140
122
  # Platform clients (preferred names)
141
123
  "Twitter",
142
124
  "LinkedIn",
143
125
  "Reddit",
144
126
  "YouTube",
145
127
  "GoogleAnalytics",
146
- # Backward compatibility aliases
147
- "TwitterPoster",
148
- "LinkedInPoster",
149
- "RedditPoster",
150
- "YouTubePoster",
151
128
  # Content strategies
152
129
  "PLATFORM_STRATEGIES",
153
130
  ]