scitex 2.17.0__py3-none-any.whl → 2.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/_dev/__init__.py +122 -0
- scitex/_dev/_config.py +391 -0
- scitex/_dev/_dashboard/__init__.py +11 -0
- scitex/_dev/_dashboard/_app.py +89 -0
- scitex/_dev/_dashboard/_routes.py +169 -0
- scitex/_dev/_dashboard/_scripts.py +301 -0
- scitex/_dev/_dashboard/_styles.py +205 -0
- scitex/_dev/_dashboard/_templates.py +117 -0
- scitex/_dev/_dashboard/static/version-dashboard-favicon.svg +12 -0
- scitex/_dev/_ecosystem.py +109 -0
- scitex/_dev/_github.py +360 -0
- scitex/_dev/_mcp/__init__.py +11 -0
- scitex/_dev/_mcp/handlers.py +182 -0
- scitex/_dev/_ssh.py +332 -0
- scitex/_dev/_versions.py +272 -0
- scitex/_mcp_tools/__init__.py +2 -0
- scitex/_mcp_tools/dev.py +186 -0
- scitex/audio/_audio_check.py +84 -41
- scitex/cli/capture.py +45 -22
- scitex/cli/dev.py +494 -0
- scitex/cli/main.py +2 -0
- scitex/cli/stats.py +48 -20
- scitex/cli/verify.py +33 -36
- scitex/plt/__init__.py +16 -6
- scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
- scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
- scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
- scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
- scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
- scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
- scitex/template/__init__.py +18 -1
- scitex/template/clone_research_minimal.py +111 -0
- scitex/verify/README.md +0 -12
- scitex/verify/__init__.py +0 -4
- scitex/verify/_visualize.py +0 -4
- scitex/verify/_viz/__init__.py +0 -18
- {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/METADATA +2 -1
- {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/RECORD +41 -49
- scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
- scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
- scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
- scitex/scholar/data/.gitkeep +0 -0
- scitex/scholar/data/README.md +0 -44
- scitex/scholar/data/bib_files/bibliography.bib +0 -1952
- scitex/scholar/data/bib_files/neurovista.bib +0 -277
- scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
- scitex/scholar/data/bib_files/openaccess.bib +0 -89
- scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
- scitex/scholar/data/bib_files/pac.bib +0 -698
- scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
- scitex/scholar/data/bib_files/pac_processed.bib +0 -0
- scitex/scholar/data/bib_files/pac_titles.txt +0 -75
- scitex/scholar/data/bib_files/paywalled.bib +0 -98
- scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
- scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
- scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
- scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
- scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_seizure.bib +0 -46
- scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
- scitex/scholar/data/impact_factor.db +0 -0
- scitex/verify/_viz/_plotly.py +0 -193
- {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/WHEEL +0 -0
- {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/entry_points.txt +0 -0
- {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-07-29 03:10:08 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/_ResolverLinkFinder.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
__FILE__ = (
|
|
11
|
+
"./src/scitex/scholar/open_url/_ResolverLinkFinder.py"
|
|
12
|
+
)
|
|
13
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
14
|
+
# ----------------------------------------
|
|
15
|
+
|
|
16
|
+
"""Robust resolver link finder using a prioritized, multi-layered approach.
|
|
17
|
+
|
|
18
|
+
Priority order:
|
|
19
|
+
1. Link Target (domain matching) - Most reliable
|
|
20
|
+
2. Page Structure (CSS selectors) - Very reliable
|
|
21
|
+
3. Text Patterns - Good fallback
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
from typing import List, Optional
|
|
26
|
+
from urllib.parse import urlparse
|
|
27
|
+
|
|
28
|
+
from playwright.async_api import ElementHandle, Page
|
|
29
|
+
|
|
30
|
+
from scitex import logging
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ResolverLinkFinder:
|
|
36
|
+
"""Finds full-text links on resolver pages using multiple strategies."""
|
|
37
|
+
|
|
38
|
+
# DOI prefix to publisher domain mapping
|
|
39
|
+
DOI_TO_DOMAIN = {
|
|
40
|
+
"10.1038": [
|
|
41
|
+
"nature.com",
|
|
42
|
+
"springernature.com",
|
|
43
|
+
], # Nature Publishing Group
|
|
44
|
+
"10.1016": ["sciencedirect.com", "elsevier.com"], # Elsevier
|
|
45
|
+
"10.1002": ["wiley.com", "onlinelibrary.wiley.com"], # Wiley
|
|
46
|
+
"10.1007": ["springer.com", "link.springer.com"], # Springer
|
|
47
|
+
"10.1126": ["science.org", "sciencemag.org"], # Science/AAAS
|
|
48
|
+
"10.1021": ["acs.org", "pubs.acs.org"], # ACS Publications
|
|
49
|
+
"10.1111": [
|
|
50
|
+
"wiley.com",
|
|
51
|
+
"onlinelibrary.wiley.com",
|
|
52
|
+
], # Wiley (alternative)
|
|
53
|
+
"10.1080": ["tandfonline.com"], # Taylor & Francis
|
|
54
|
+
"10.1177": ["sagepub.com", "journals.sagepub.com"], # SAGE
|
|
55
|
+
"10.1093": ["oup.com", "academic.oup.com"], # Oxford
|
|
56
|
+
"10.1109": ["ieee.org", "ieeexplore.ieee.org"], # IEEE
|
|
57
|
+
"10.1371": ["plos.org", "journals.plos.org"], # PLOS
|
|
58
|
+
"10.1073": ["pnas.org"], # PNAS
|
|
59
|
+
"10.1136": ["bmj.com"], # BMJ
|
|
60
|
+
"10.3389": ["frontiersin.org"], # Frontiers
|
|
61
|
+
"10.3390": ["mdpi.com"], # MDPI
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Common resolver page structures
|
|
65
|
+
STRUCTURE_SELECTORS = [
|
|
66
|
+
# SFX (ExLibris) - used by many universities
|
|
67
|
+
"div#fulltext a",
|
|
68
|
+
"div.sfx-fulltext a",
|
|
69
|
+
"div.results-title > a",
|
|
70
|
+
"td.object-cell a",
|
|
71
|
+
".getFullTxt a",
|
|
72
|
+
'div[id*="fulltext"] a',
|
|
73
|
+
'div[class*="fulltext"] a',
|
|
74
|
+
# SFX specific selectors for University of Melbourne
|
|
75
|
+
"a[title*='Wiley Online Library']",
|
|
76
|
+
"a[href*='wiley.com']",
|
|
77
|
+
"a[href*='onlinelibrary.wiley.com']",
|
|
78
|
+
".sfx-target a",
|
|
79
|
+
".target a",
|
|
80
|
+
"td a[href*='wiley']",
|
|
81
|
+
# Primo (ExLibris)
|
|
82
|
+
"prm-full-view-service-container a",
|
|
83
|
+
"span.availability-status-available a",
|
|
84
|
+
# Summon (ProQuest)
|
|
85
|
+
".summon-fulltext-link",
|
|
86
|
+
"a.summon-link",
|
|
87
|
+
# EDS (EBSCO)
|
|
88
|
+
"a.fulltext-link",
|
|
89
|
+
".ft-link a",
|
|
90
|
+
# Generic patterns
|
|
91
|
+
"a.full-text-link",
|
|
92
|
+
"a.fulltext",
|
|
93
|
+
"a#full-text-link",
|
|
94
|
+
".access-link a",
|
|
95
|
+
".available-link a",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
# Text patterns in priority order
|
|
99
|
+
TEXT_PATTERNS = [
|
|
100
|
+
# Most specific
|
|
101
|
+
"View full text at",
|
|
102
|
+
"Available from Nature",
|
|
103
|
+
"Available from ScienceDirect",
|
|
104
|
+
"Available from Wiley",
|
|
105
|
+
"Available from Wiley Online Library",
|
|
106
|
+
"Full text available from",
|
|
107
|
+
# Common patterns
|
|
108
|
+
"View full text",
|
|
109
|
+
"Full Text from Publisher",
|
|
110
|
+
"Get full text",
|
|
111
|
+
"Access full text",
|
|
112
|
+
"Go to article",
|
|
113
|
+
"Access article",
|
|
114
|
+
# Generic but reliable
|
|
115
|
+
"Full Text",
|
|
116
|
+
"Full text",
|
|
117
|
+
"Article",
|
|
118
|
+
"View",
|
|
119
|
+
"PDF",
|
|
120
|
+
"Download",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
def __init__(self):
|
|
124
|
+
self._doi_pattern = re.compile(r"10\.\d{4,}/[-._;()/:\w]+")
|
|
125
|
+
|
|
126
|
+
def get_expected_domains(self, doi: str) -> List[str]:
|
|
127
|
+
"""Get expected publisher domains for a DOI."""
|
|
128
|
+
# Extract DOI prefix
|
|
129
|
+
match = re.match(r"(10\.\d{4,})", doi)
|
|
130
|
+
if not match:
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
prefix = match.group(1)
|
|
134
|
+
return self.DOI_TO_DOMAIN.get(prefix, [])
|
|
135
|
+
|
|
136
|
+
async def find_link_async(self, page, doi: str) -> dict:
|
|
137
|
+
"""Find the best full-text link using prioritized strategies."""
|
|
138
|
+
logger.info(f"Finding resolver link for DOI: {doi}")
|
|
139
|
+
|
|
140
|
+
# Strategy 1: Link Target (Most Reliable)
|
|
141
|
+
link_url = await self._find_by_domain_async(page, doi)
|
|
142
|
+
if link_url:
|
|
143
|
+
logger.info("✓ Found link using domain matching (Strategy 1)")
|
|
144
|
+
return {"success": True, "url": link_url, "method": "domain"}
|
|
145
|
+
|
|
146
|
+
# Strategy 2: Page Structure with scoring
|
|
147
|
+
link_url = await self._find_by_structure_async(page, doi)
|
|
148
|
+
if link_url:
|
|
149
|
+
logger.info("✓ Found link using page structure (Strategy 2)")
|
|
150
|
+
return {"success": True, "url": link_url, "method": "structure"}
|
|
151
|
+
|
|
152
|
+
logger.warning("✗ No suitable links found")
|
|
153
|
+
return {"success": False, "url": None, "method": None}
|
|
154
|
+
|
|
155
|
+
async def _find_by_domain_async(self, page: Page, doi: str) -> Optional[str]:
|
|
156
|
+
"""Strategy 1: Find link by expected publisher domain."""
|
|
157
|
+
expected_domains = self.get_expected_domains(doi)
|
|
158
|
+
if not expected_domains:
|
|
159
|
+
logger.debug(f"No known publisher domains for DOI prefix: {doi}")
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
logger.debug(f"Looking for links to domains: {expected_domains}")
|
|
163
|
+
all_links = await page.query_selector_all("a[href]")
|
|
164
|
+
|
|
165
|
+
for link in all_links:
|
|
166
|
+
href = await link.get_attribute("href")
|
|
167
|
+
if not href:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
parsed = urlparse(href)
|
|
172
|
+
domain = parsed.netloc.lower()
|
|
173
|
+
|
|
174
|
+
for expected in expected_domains:
|
|
175
|
+
if expected in domain:
|
|
176
|
+
text = await link.inner_text() or ""
|
|
177
|
+
logger.info(
|
|
178
|
+
f"Found domain match: {domain} (text: '{text[:50]}')"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if not any(
|
|
182
|
+
bad in text.lower()
|
|
183
|
+
for bad in ["abstract", "preview", "summary"]
|
|
184
|
+
):
|
|
185
|
+
return href
|
|
186
|
+
else:
|
|
187
|
+
logger.debug(
|
|
188
|
+
f"Skipping abstract/preview link: {text}"
|
|
189
|
+
)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.debug(f"Error parsing URL {href}: {e}")
|
|
192
|
+
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
async def _find_by_structure_async(self, page, doi: str):
|
|
196
|
+
"""Find link by page structure with publisher prioritization."""
|
|
197
|
+
potential_links = []
|
|
198
|
+
expected_domains = self.get_expected_domains(doi)
|
|
199
|
+
publisher_keywords = [
|
|
200
|
+
domain.split(".")[0] for domain in expected_domains
|
|
201
|
+
]
|
|
202
|
+
aggregator_keywords = ["gale", "proquest", "ebsco", "jstor", "onefile"]
|
|
203
|
+
|
|
204
|
+
# Gather all possible links
|
|
205
|
+
for selector in self.STRUCTURE_SELECTORS:
|
|
206
|
+
try:
|
|
207
|
+
elements = await page.query_selector_all(selector)
|
|
208
|
+
logger.debug(
|
|
209
|
+
f"Found {len(elements)} elements with selector: {selector}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
for element in elements:
|
|
213
|
+
if await element.is_visible():
|
|
214
|
+
href = await element.get_attribute("href")
|
|
215
|
+
text = (await element.inner_text() or "").lower()
|
|
216
|
+
|
|
217
|
+
if href and href.strip():
|
|
218
|
+
potential_links.append(
|
|
219
|
+
{"href": href, "text": text, "score": 0}
|
|
220
|
+
)
|
|
221
|
+
except Exception as element_error:
|
|
222
|
+
logger.debug(
|
|
223
|
+
f"Error with selector '{selector}': {element_error}"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if not potential_links:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
# Score the links
|
|
230
|
+
for link in potential_links:
|
|
231
|
+
# Highest score for direct publisher match
|
|
232
|
+
if any(keyword in link["text"] for keyword in publisher_keywords):
|
|
233
|
+
link["score"] = 3
|
|
234
|
+
# High score for generic publisher
|
|
235
|
+
elif "publisher" in link["text"]:
|
|
236
|
+
link["score"] = 2
|
|
237
|
+
# Negative score for aggregators
|
|
238
|
+
elif any(
|
|
239
|
+
keyword in link["text"] for keyword in aggregator_keywords
|
|
240
|
+
):
|
|
241
|
+
link["score"] = -1
|
|
242
|
+
# Default neutral score
|
|
243
|
+
else:
|
|
244
|
+
link["score"] = 0
|
|
245
|
+
|
|
246
|
+
# Sort by score, highest first
|
|
247
|
+
sorted_links = sorted(
|
|
248
|
+
potential_links, key=lambda x: x["score"], reverse=True
|
|
249
|
+
)
|
|
250
|
+
best_link = sorted_links[0]
|
|
251
|
+
|
|
252
|
+
logger.debug(
|
|
253
|
+
f"Found structural match: '{best_link['text'][:50]}' -> {best_link['href']}"
|
|
254
|
+
)
|
|
255
|
+
return best_link["href"]
|
|
256
|
+
|
|
257
|
+
async def _find_by_text_async(self, page: Page) -> Optional[str]:
|
|
258
|
+
"""Strategy 3: Find link by text patterns."""
|
|
259
|
+
for pattern in self.TEXT_PATTERNS:
|
|
260
|
+
try:
|
|
261
|
+
selector = f'a:has-text("{pattern}")'
|
|
262
|
+
link = await page.query_selector(selector)
|
|
263
|
+
if link and await link.is_visible():
|
|
264
|
+
href = await link.get_attribute("href")
|
|
265
|
+
if href and href.strip():
|
|
266
|
+
logger.debug(
|
|
267
|
+
f"Found text match: '{pattern}' -> {href[:100]}"
|
|
268
|
+
)
|
|
269
|
+
return href
|
|
270
|
+
except Exception as e:
|
|
271
|
+
logger.debug(f"Error with text pattern '{pattern}': {e}")
|
|
272
|
+
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
async def click_and_wait_async(self, page: Page, link: ElementHandle) -> bool:
|
|
276
|
+
"""Click link and wait for navigation.
|
|
277
|
+
|
|
278
|
+
Returns True if navigation succeeded.
|
|
279
|
+
"""
|
|
280
|
+
initial_url = page.url
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# Get link info for logging
|
|
284
|
+
href = await link.get_attribute("href") or ""
|
|
285
|
+
text = await link.inner_text() or ""
|
|
286
|
+
logger.info(f"Clicking link: '{text[:50]}' -> {href[:100]}")
|
|
287
|
+
|
|
288
|
+
# Click and wait for navigation
|
|
289
|
+
await link.click()
|
|
290
|
+
|
|
291
|
+
# Wait for either navigation or network idle
|
|
292
|
+
try:
|
|
293
|
+
await page.wait_for_load_state("networkidle", timeout=30000)
|
|
294
|
+
except:
|
|
295
|
+
# Fallback to domcontentloaded if network doesn't settle
|
|
296
|
+
await page.wait_for_load_state(
|
|
297
|
+
"domcontentloaded", timeout=30000
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Additional wait for JavaScript redirects
|
|
301
|
+
await page.wait_for_timeout(3000)
|
|
302
|
+
|
|
303
|
+
# Check if we navigated
|
|
304
|
+
final_url = page.url
|
|
305
|
+
if final_url != initial_url:
|
|
306
|
+
logger.info(
|
|
307
|
+
f"Successfully navigated: {initial_url} -> {final_url}"
|
|
308
|
+
)
|
|
309
|
+
return True
|
|
310
|
+
else:
|
|
311
|
+
logger.warning("No navigation occurred after click")
|
|
312
|
+
return False
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.error(f"Error during click and navigation: {e}")
|
|
316
|
+
return False
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
# Convenience function for integration
|
|
320
|
+
async def find_and_click_resolver_link_async(page: Page, doi: str) -> Optional[str]:
|
|
321
|
+
"""Find and click the best resolver link.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
page: Playwright page object
|
|
325
|
+
doi: Target DOI
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Final URL after navigation, or None if failed
|
|
329
|
+
"""
|
|
330
|
+
finder = ResolverLinkFinder()
|
|
331
|
+
|
|
332
|
+
# Find link
|
|
333
|
+
link = await finder.find_link_async(page, doi)
|
|
334
|
+
if not link:
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
# Click and navigate
|
|
338
|
+
success = await finder.click_and_wait_async(page, link)
|
|
339
|
+
if success:
|
|
340
|
+
return page.url
|
|
341
|
+
else:
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
# EOF
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-07-31 00:53:24 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/__init__.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
__FILE__ = (
|
|
11
|
+
"./src/scitex/scholar/open_url/__init__.py"
|
|
12
|
+
)
|
|
13
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
14
|
+
# ----------------------------------------
|
|
15
|
+
|
|
16
|
+
from ._DOIToURLResolver import DOIToURLResolver
|
|
17
|
+
from ._OpenURLResolver import OpenURLResolver
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"OpenURLResolver",
|
|
21
|
+
"DOIToURLResolver",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# EOF
|
scitex/template/__init__.py
CHANGED
|
@@ -22,6 +22,7 @@ from .clone_pip_project import TEMPLATE_REPO_URL as PIP_PROJECT_URL
|
|
|
22
22
|
from .clone_pip_project import clone_pip_project
|
|
23
23
|
from .clone_research import TEMPLATE_REPO_URL as RESEARCH_URL
|
|
24
24
|
from .clone_research import clone_research
|
|
25
|
+
from .clone_research_minimal import clone_research_minimal
|
|
25
26
|
from .clone_singularity import TEMPLATE_REPO_URL as SINGULARITY_URL
|
|
26
27
|
from .clone_singularity import clone_singularity
|
|
27
28
|
from .clone_writer_directory import (
|
|
@@ -54,9 +55,24 @@ def get_available_templates_info():
|
|
|
54
55
|
... print(f"{template['name']}: {template['description']}")
|
|
55
56
|
"""
|
|
56
57
|
return [
|
|
58
|
+
{
|
|
59
|
+
"id": "research_minimal",
|
|
60
|
+
"name": "Research Minimal",
|
|
61
|
+
"description": "Minimal SciTeX structure with writer, scholar, visualizer, and console",
|
|
62
|
+
"github_url": RESEARCH_URL,
|
|
63
|
+
"branch": "minimal",
|
|
64
|
+
"use_case": "Focused research workflow with essential SciTeX modules only",
|
|
65
|
+
"features": [
|
|
66
|
+
"scitex/writer/ - LaTeX manuscript writing",
|
|
67
|
+
"scitex/scholar/ - Bibliography management",
|
|
68
|
+
"scitex/visualizer/ - Figure creation",
|
|
69
|
+
"scitex/console/ - Code execution",
|
|
70
|
+
"scitex/management/ - Project management",
|
|
71
|
+
],
|
|
72
|
+
},
|
|
57
73
|
{
|
|
58
74
|
"id": "research",
|
|
59
|
-
"name": "Research Project",
|
|
75
|
+
"name": "Research Project (Full)",
|
|
60
76
|
"description": "Full scientific workflow structure for research projects",
|
|
61
77
|
"github_url": RESEARCH_URL,
|
|
62
78
|
"use_case": "Scientific research with data analysis, experiments, and paper writing",
|
|
@@ -113,6 +129,7 @@ def get_available_templates_info():
|
|
|
113
129
|
|
|
114
130
|
__all__ = [
|
|
115
131
|
"clone_research",
|
|
132
|
+
"clone_research_minimal",
|
|
116
133
|
"clone_pip_project",
|
|
117
134
|
"clone_singularity",
|
|
118
135
|
"clone_writer_directory",
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# File: /home/ywatanabe/proj/scitex-code/src/scitex/template/clone_research_minimal.py
|
|
3
|
+
# ----------------------------------------
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
__FILE__ = "./src/scitex/template/clone_research_minimal.py"
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Create a new minimal research project from the scitex_template_research template.
|
|
14
|
+
|
|
15
|
+
Uses the 'minimal' branch which contains only the scitex/ directory with:
|
|
16
|
+
- writer/ - LaTeX manuscript writing
|
|
17
|
+
- scholar/ - Bibliography management
|
|
18
|
+
- visualizer/ - Figure creation
|
|
19
|
+
- console/ - Code execution
|
|
20
|
+
- management/ - Project management
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import sys
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
from ._clone_project import clone_project
|
|
27
|
+
|
|
28
|
+
TEMPLATE_REPO_URL = "https://github.com/ywatanabe1989/scitex-minimal-template.git"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def clone_research_minimal(
|
|
32
|
+
project_dir: str,
|
|
33
|
+
git_strategy: Optional[str] = "child",
|
|
34
|
+
branch: Optional[str] = None,
|
|
35
|
+
tag: Optional[str] = None,
|
|
36
|
+
) -> bool:
|
|
37
|
+
"""
|
|
38
|
+
Create a new minimal research project from the scitex-minimal-template.
|
|
39
|
+
|
|
40
|
+
This template contains only the essential scitex/ directory structure:
|
|
41
|
+
- writer/ - Full LaTeX manuscript writing with compilation scripts
|
|
42
|
+
- scholar/ - Bibliography management
|
|
43
|
+
- visualizer/ - Figure creation
|
|
44
|
+
- console/ - Code execution
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
project_dir : str
|
|
49
|
+
Path to project directory (will be created). Can be a simple name like "my_project"
|
|
50
|
+
or a full path like "./projects/my_project"
|
|
51
|
+
git_strategy : str, optional
|
|
52
|
+
Git initialization strategy ('child', 'parent', None). Default is 'child'.
|
|
53
|
+
branch : str, optional
|
|
54
|
+
Specific branch of the template repository to clone.
|
|
55
|
+
tag : str, optional
|
|
56
|
+
Specific tag/release of the template repository to clone.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
bool
|
|
61
|
+
True if successful, False otherwise
|
|
62
|
+
|
|
63
|
+
Example
|
|
64
|
+
-------
|
|
65
|
+
>>> from scitex.template import clone_research_minimal
|
|
66
|
+
>>> clone_research_minimal("my_research_project")
|
|
67
|
+
>>> clone_research_minimal("./projects/my_project")
|
|
68
|
+
"""
|
|
69
|
+
return clone_project(
|
|
70
|
+
project_dir,
|
|
71
|
+
TEMPLATE_REPO_URL,
|
|
72
|
+
"scitex-minimal-template",
|
|
73
|
+
git_strategy,
|
|
74
|
+
branch=branch,
|
|
75
|
+
tag=tag,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def main(args: list = None) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Command-line interface for clone_research_minimal.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
args : list, optional
|
|
86
|
+
Command-line arguments. If None, uses sys.argv[1:]
|
|
87
|
+
"""
|
|
88
|
+
if args is None:
|
|
89
|
+
args = sys.argv[1:]
|
|
90
|
+
|
|
91
|
+
if len(args) < 1:
|
|
92
|
+
print("Usage: python -m scitex clone_research_minimal <project-dir>")
|
|
93
|
+
print("")
|
|
94
|
+
print("Arguments:")
|
|
95
|
+
print(" project-dir Path to project directory (will be created)")
|
|
96
|
+
print(" Can be a simple name like 'my_project' or a full path")
|
|
97
|
+
print("")
|
|
98
|
+
print("Example:")
|
|
99
|
+
print(" python -m scitex clone_research_minimal my_research_project")
|
|
100
|
+
sys.exit(1)
|
|
101
|
+
|
|
102
|
+
project_dir = args[0]
|
|
103
|
+
|
|
104
|
+
success = clone_research_minimal(project_dir)
|
|
105
|
+
sys.exit(0 if success else 1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
main()
|
|
110
|
+
|
|
111
|
+
# EOF
|
scitex/verify/README.md
CHANGED
|
@@ -34,7 +34,6 @@ scitex/verify/
|
|
|
34
34
|
├── _visualize.py # Re-exports from _viz/
|
|
35
35
|
└── _viz/
|
|
36
36
|
├── _mermaid.py # Mermaid DAG generation
|
|
37
|
-
├── _plotly.py # Interactive Plotly DAGs
|
|
38
37
|
├── _json.py # JSON DAG export
|
|
39
38
|
├── _format.py # Terminal output formatting
|
|
40
39
|
├── _colors.py # Color constants
|
|
@@ -199,17 +198,6 @@ render_dag("dag.mmd", target_file="output.csv") # Mermaid code
|
|
|
199
198
|
render_dag("dag.json", target_file="output.csv") # Graph structure
|
|
200
199
|
```
|
|
201
200
|
|
|
202
|
-
### Interactive Plotly
|
|
203
|
-
|
|
204
|
-
```python
|
|
205
|
-
from scitex.verify import generate_plotly_dag, render_plotly_dag
|
|
206
|
-
|
|
207
|
-
fig = generate_plotly_dag(target_file="output.csv")
|
|
208
|
-
fig.show() # Opens browser
|
|
209
|
-
|
|
210
|
-
render_plotly_dag("dag_plotly.html", target_file="output.csv")
|
|
211
|
-
```
|
|
212
|
-
|
|
213
201
|
## Integration Hooks (`_integration.py`)
|
|
214
202
|
|
|
215
203
|
Automatically called by `@stx.session` and `stx.io`:
|
scitex/verify/__init__.py
CHANGED
|
@@ -109,10 +109,8 @@ from ._visualize import (
|
|
|
109
109
|
format_status,
|
|
110
110
|
generate_html_dag,
|
|
111
111
|
generate_mermaid_dag,
|
|
112
|
-
generate_plotly_dag,
|
|
113
112
|
print_verification_summary,
|
|
114
113
|
render_dag,
|
|
115
|
-
render_plotly_dag,
|
|
116
114
|
)
|
|
117
115
|
|
|
118
116
|
|
|
@@ -191,9 +189,7 @@ __all__ = [
|
|
|
191
189
|
"format_list",
|
|
192
190
|
"generate_mermaid_dag",
|
|
193
191
|
"generate_html_dag",
|
|
194
|
-
"generate_plotly_dag",
|
|
195
192
|
"render_dag",
|
|
196
|
-
"render_plotly_dag",
|
|
197
193
|
"print_verification_summary",
|
|
198
194
|
# Convenience functions
|
|
199
195
|
"list_runs",
|
scitex/verify/_visualize.py
CHANGED
|
@@ -22,10 +22,8 @@ from ._viz import (
|
|
|
22
22
|
format_status,
|
|
23
23
|
generate_html_dag,
|
|
24
24
|
generate_mermaid_dag,
|
|
25
|
-
generate_plotly_dag,
|
|
26
25
|
print_verification_summary,
|
|
27
26
|
render_dag,
|
|
28
|
-
render_plotly_dag,
|
|
29
27
|
)
|
|
30
28
|
|
|
31
29
|
__all__ = [
|
|
@@ -38,9 +36,7 @@ __all__ = [
|
|
|
38
36
|
"format_list",
|
|
39
37
|
"generate_mermaid_dag",
|
|
40
38
|
"generate_html_dag",
|
|
41
|
-
"generate_plotly_dag",
|
|
42
39
|
"render_dag",
|
|
43
|
-
"render_plotly_dag",
|
|
44
40
|
"print_verification_summary",
|
|
45
41
|
]
|
|
46
42
|
|
scitex/verify/_viz/__init__.py
CHANGED
|
@@ -7,7 +7,6 @@ Provides multiple visualization backends:
|
|
|
7
7
|
- Terminal: Colored text output with status icons
|
|
8
8
|
- Mermaid: Text-based diagrams for docs/GitHub
|
|
9
9
|
- HTML: Interactive web visualization
|
|
10
|
-
- Plotly: Interactive Python-based visualization (optional)
|
|
11
10
|
"""
|
|
12
11
|
|
|
13
12
|
from ._colors import Colors, VerificationLevel
|
|
@@ -21,21 +20,6 @@ from ._format import (
|
|
|
21
20
|
from ._mermaid import generate_html_dag, generate_mermaid_dag, render_dag
|
|
22
21
|
from ._utils import print_verification_summary
|
|
23
22
|
|
|
24
|
-
# Optional Plotly support
|
|
25
|
-
try:
|
|
26
|
-
from ._plotly import generate_plotly_dag, render_plotly_dag
|
|
27
|
-
|
|
28
|
-
_HAS_PLOTLY = True
|
|
29
|
-
except ImportError:
|
|
30
|
-
_HAS_PLOTLY = False
|
|
31
|
-
|
|
32
|
-
def generate_plotly_dag(*args, **kwargs):
|
|
33
|
-
raise ImportError("plotly required: pip install plotly")
|
|
34
|
-
|
|
35
|
-
def render_plotly_dag(*args, **kwargs):
|
|
36
|
-
raise ImportError("plotly required: pip install plotly")
|
|
37
|
-
|
|
38
|
-
|
|
39
23
|
__all__ = [
|
|
40
24
|
"Colors",
|
|
41
25
|
"VerificationLevel",
|
|
@@ -48,8 +32,6 @@ __all__ = [
|
|
|
48
32
|
"generate_html_dag",
|
|
49
33
|
"render_dag",
|
|
50
34
|
"print_verification_summary",
|
|
51
|
-
"generate_plotly_dag",
|
|
52
|
-
"render_plotly_dag",
|
|
53
35
|
]
|
|
54
36
|
|
|
55
37
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scitex
|
|
3
|
-
Version: 2.17.
|
|
3
|
+
Version: 2.17.3
|
|
4
4
|
Summary: A comprehensive Python library for scientific computing and data analysis
|
|
5
5
|
Project-URL: Homepage, https://github.com/ywatanabe1989/scitex-python
|
|
6
6
|
Project-URL: Documentation, https://scitex.readthedocs.io
|
|
@@ -442,6 +442,7 @@ Requires-Dist: matplotlib; extra == 'utils'
|
|
|
442
442
|
Requires-Dist: natsort; extra == 'utils'
|
|
443
443
|
Requires-Dist: tqdm; extra == 'utils'
|
|
444
444
|
Requires-Dist: xarray; extra == 'utils'
|
|
445
|
+
Provides-Extra: verify
|
|
445
446
|
Provides-Extra: web
|
|
446
447
|
Requires-Dist: aiohttp; extra == 'web'
|
|
447
448
|
Requires-Dist: anthropic; extra == 'web'
|