napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +1 -1
- napistu/consensus.py +1010 -513
- napistu/constants.py +24 -0
- napistu/gcs/constants.py +2 -2
- napistu/gcs/downloads.py +57 -25
- napistu/gcs/utils.py +21 -0
- napistu/identifiers.py +105 -6
- napistu/ingestion/constants.py +0 -1
- napistu/ingestion/obo.py +24 -8
- napistu/ingestion/psi_mi.py +20 -5
- napistu/ingestion/reactome.py +8 -32
- napistu/mcp/__init__.py +69 -0
- napistu/mcp/__main__.py +180 -0
- napistu/mcp/codebase.py +182 -0
- napistu/mcp/codebase_utils.py +298 -0
- napistu/mcp/constants.py +72 -0
- napistu/mcp/documentation.py +166 -0
- napistu/mcp/documentation_utils.py +235 -0
- napistu/mcp/execution.py +382 -0
- napistu/mcp/profiles.py +73 -0
- napistu/mcp/server.py +86 -0
- napistu/mcp/tutorials.py +124 -0
- napistu/mcp/tutorials_utils.py +230 -0
- napistu/mcp/utils.py +47 -0
- napistu/mechanism_matching.py +782 -26
- napistu/modify/constants.py +41 -0
- napistu/modify/curation.py +4 -1
- napistu/modify/gaps.py +243 -156
- napistu/modify/pathwayannot.py +26 -8
- napistu/network/neighborhoods.py +16 -7
- napistu/network/net_create.py +209 -54
- napistu/network/net_propagation.py +118 -0
- napistu/network/net_utils.py +1 -32
- napistu/rpy2/netcontextr.py +10 -7
- napistu/rpy2/rids.py +7 -5
- napistu/sbml_dfs_core.py +46 -29
- napistu/sbml_dfs_utils.py +37 -1
- napistu/source.py +8 -2
- napistu/utils.py +67 -8
- napistu-0.2.4.dev3.dist-info/METADATA +84 -0
- napistu-0.2.4.dev3.dist-info/RECORD +95 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
- tests/conftest.py +11 -5
- tests/test_consensus.py +4 -1
- tests/test_gaps.py +127 -0
- tests/test_gcs.py +3 -2
- tests/test_igraph.py +14 -0
- tests/test_mcp_documentation_utils.py +13 -0
- tests/test_mechanism_matching.py +658 -0
- tests/test_net_propagation.py +89 -0
- tests/test_net_utils.py +83 -0
- tests/test_sbml.py +2 -0
- tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
- tests/test_utils.py +81 -0
- napistu-0.1.0.dist-info/METADATA +0 -56
- napistu-0.1.0.dist-info/RECORD +0 -77
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,230 @@
|
|
1
|
+
"""
|
2
|
+
Utilities for loading and processing tutorials.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict
|
8
|
+
import httpx
|
9
|
+
import logging
|
10
|
+
|
11
|
+
from napistu.gcs.utils import _initialize_data_dir
|
12
|
+
|
13
|
+
from napistu.mcp.constants import TUTORIAL_URLS
|
14
|
+
from napistu.mcp.constants import TUTORIALS_CACHE_DIR
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
# Import optional dependencies with error handling
|
19
|
+
try:
|
20
|
+
import nbformat
|
21
|
+
except ImportError:
|
22
|
+
raise ImportError(
|
23
|
+
"Tutorial utilities require additional dependencies. Install with 'pip install napistu[mcp]'"
|
24
|
+
)
|
25
|
+
|
26
|
+
# Configure logger for this module
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
async def get_tutorial_markdown(
|
31
|
+
tutorial_id: str,
|
32
|
+
tutorial_urls: Dict[str, str] = TUTORIAL_URLS,
|
33
|
+
cache_dir: Path = TUTORIALS_CACHE_DIR,
|
34
|
+
) -> str:
|
35
|
+
"""
|
36
|
+
Download/cache the notebook if needed, load it, and return the markdown.
|
37
|
+
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
tutorial_id : str
|
41
|
+
The ID of the tutorial (key in tutorial_urls).
|
42
|
+
tutorial_urls : dict, optional
|
43
|
+
Mapping of tutorial IDs to GitHub raw URLs. Defaults to TUTORIAL_URLS.
|
44
|
+
cache_dir : Path, optional
|
45
|
+
Directory to cache downloaded notebooks. Defaults to TUTORIALS_CACHE_DIR.
|
46
|
+
|
47
|
+
Returns
|
48
|
+
-------
|
49
|
+
str
|
50
|
+
Markdown content of the notebook as a string.
|
51
|
+
|
52
|
+
Raises
|
53
|
+
------
|
54
|
+
Exception
|
55
|
+
If the notebook cannot be downloaded, loaded, or parsed.
|
56
|
+
|
57
|
+
Examples
|
58
|
+
--------
|
59
|
+
>>> markdown = await get_tutorial_markdown('my_tutorial')
|
60
|
+
>>> print(markdown)
|
61
|
+
"""
|
62
|
+
try:
|
63
|
+
path = await _ensure_notebook_cached(tutorial_id, tutorial_urls, cache_dir)
|
64
|
+
logger.debug(f"Loading notebook for tutorial '{tutorial_id}' from '{path}'")
|
65
|
+
with open(path, "r", encoding="utf-8") as f:
|
66
|
+
notebook = nbformat.read(f, as_version=4)
|
67
|
+
logger.debug(f"Parsing notebook for tutorial '{tutorial_id}' to markdown")
|
68
|
+
return notebook_to_markdown(notebook)
|
69
|
+
except Exception as e:
|
70
|
+
logger.error(
|
71
|
+
f"Error getting tutorial content for tutorial_id='{tutorial_id}': {e}"
|
72
|
+
)
|
73
|
+
raise
|
74
|
+
|
75
|
+
|
76
|
+
async def fetch_notebook_from_github(
|
77
|
+
tutorial_id: str, url: str, cache_dir: Path = TUTORIALS_CACHE_DIR
|
78
|
+
) -> Path:
|
79
|
+
"""
|
80
|
+
Fetch a notebook from GitHub and cache it locally.
|
81
|
+
|
82
|
+
Parameters
|
83
|
+
----------
|
84
|
+
tutorial_id : str
|
85
|
+
The ID of the tutorial.
|
86
|
+
url : str
|
87
|
+
The raw GitHub URL to the notebook file.
|
88
|
+
cache_dir : Path, optional
|
89
|
+
Directory to cache the notebook. Defaults to TUTORIALS_CACHE_DIR.
|
90
|
+
|
91
|
+
Returns
|
92
|
+
-------
|
93
|
+
Path
|
94
|
+
Path to the cached notebook file.
|
95
|
+
|
96
|
+
Raises
|
97
|
+
------
|
98
|
+
httpx.HTTPError
|
99
|
+
If the download fails.
|
100
|
+
|
101
|
+
Examples
|
102
|
+
--------
|
103
|
+
>>> await fetch_notebook_from_github('my_tutorial', 'https://github.com/.../my_tutorial.ipynb')
|
104
|
+
"""
|
105
|
+
_initialize_data_dir(cache_dir)
|
106
|
+
cache_path = _get_cached_notebook_path(tutorial_id, cache_dir)
|
107
|
+
async with httpx.AsyncClient() as client:
|
108
|
+
response = await client.get(url)
|
109
|
+
response.raise_for_status()
|
110
|
+
cache_path.write_bytes(response.content)
|
111
|
+
logger.info(
|
112
|
+
f"Downloaded and cached notebook for tutorial '{tutorial_id}' at '{cache_path}'"
|
113
|
+
)
|
114
|
+
return cache_path
|
115
|
+
|
116
|
+
|
117
|
+
def notebook_to_markdown(notebook: "nbformat.NotebookNode") -> str:
|
118
|
+
"""
|
119
|
+
Convert a Jupyter notebook to Markdown.
|
120
|
+
|
121
|
+
Parameters
|
122
|
+
----------
|
123
|
+
notebook : nbformat.NotebookNode
|
124
|
+
The loaded notebook object (as returned by nbformat.read).
|
125
|
+
|
126
|
+
Returns
|
127
|
+
-------
|
128
|
+
str
|
129
|
+
Markdown representation of the notebook, including code cells and outputs.
|
130
|
+
|
131
|
+
Examples
|
132
|
+
--------
|
133
|
+
>>> import nbformat
|
134
|
+
>>> with open('notebook.ipynb') as f:
|
135
|
+
... nb = nbformat.read(f, as_version=4)
|
136
|
+
>>> md = notebook_to_markdown(nb)
|
137
|
+
>>> print(md)
|
138
|
+
"""
|
139
|
+
markdown = []
|
140
|
+
for cell in notebook.cells:
|
141
|
+
if cell.cell_type == "markdown":
|
142
|
+
markdown.append(cell.source)
|
143
|
+
elif cell.cell_type == "code":
|
144
|
+
markdown.append("```python")
|
145
|
+
markdown.append(cell.source)
|
146
|
+
markdown.append("```")
|
147
|
+
if cell.outputs:
|
148
|
+
markdown.append("\nOutput:")
|
149
|
+
for output in cell.outputs:
|
150
|
+
if "text" in output:
|
151
|
+
markdown.append("```")
|
152
|
+
markdown.append(output["text"])
|
153
|
+
markdown.append("```")
|
154
|
+
elif "data" in output:
|
155
|
+
if "text/plain" in output["data"]:
|
156
|
+
markdown.append("```")
|
157
|
+
markdown.append(output["data"]["text/plain"])
|
158
|
+
markdown.append("```")
|
159
|
+
markdown.append("\n---\n")
|
160
|
+
return "\n".join(markdown)
|
161
|
+
|
162
|
+
|
163
|
+
async def _ensure_notebook_cached(
|
164
|
+
tutorial_id: str,
|
165
|
+
tutorial_urls: Dict[str, str] = TUTORIAL_URLS,
|
166
|
+
cache_dir: Path = TUTORIALS_CACHE_DIR,
|
167
|
+
) -> Path:
|
168
|
+
"""
|
169
|
+
Ensure the notebook is cached locally, fetching from GitHub if needed.
|
170
|
+
|
171
|
+
Parameters
|
172
|
+
----------
|
173
|
+
tutorial_id : str
|
174
|
+
The ID of the tutorial.
|
175
|
+
tutorial_urls : dict, optional
|
176
|
+
Mapping of tutorial IDs to GitHub raw URLs. Defaults to TUTORIAL_URLS.
|
177
|
+
cache_dir : Path, optional
|
178
|
+
Directory to cache notebooks. Defaults to TUTORIALS_CACHE_DIR.
|
179
|
+
|
180
|
+
Returns
|
181
|
+
-------
|
182
|
+
Path
|
183
|
+
Path to the cached notebook file.
|
184
|
+
|
185
|
+
Raises
|
186
|
+
------
|
187
|
+
FileNotFoundError
|
188
|
+
If the tutorial ID is not found in tutorial_urls.
|
189
|
+
httpx.HTTPError
|
190
|
+
If the download fails.
|
191
|
+
|
192
|
+
Examples
|
193
|
+
--------
|
194
|
+
>>> await _ensure_notebook_cached('my_tutorial')
|
195
|
+
"""
|
196
|
+
cache_path = _get_cached_notebook_path(tutorial_id, cache_dir)
|
197
|
+
if not os.path.isfile(cache_path):
|
198
|
+
url = tutorial_urls[tutorial_id]
|
199
|
+
if not url:
|
200
|
+
raise FileNotFoundError(
|
201
|
+
f"No GitHub URL found for tutorial ID: {tutorial_id}"
|
202
|
+
)
|
203
|
+
await fetch_notebook_from_github(tutorial_id, url, cache_dir)
|
204
|
+
return cache_path
|
205
|
+
|
206
|
+
|
207
|
+
def _get_cached_notebook_path(
|
208
|
+
tutorial_id: str, cache_dir: Path = TUTORIALS_CACHE_DIR
|
209
|
+
) -> Path:
|
210
|
+
"""
|
211
|
+
Get the local cache path for a tutorial notebook.
|
212
|
+
|
213
|
+
Parameters
|
214
|
+
----------
|
215
|
+
tutorial_id : str
|
216
|
+
The ID of the tutorial.
|
217
|
+
cache_dir : Path, optional
|
218
|
+
Directory to cache notebooks. Defaults to TUTORIALS_CACHE_DIR.
|
219
|
+
|
220
|
+
Returns
|
221
|
+
-------
|
222
|
+
Path
|
223
|
+
Path to the cached notebook file.
|
224
|
+
|
225
|
+
Examples
|
226
|
+
--------
|
227
|
+
>>> _get_cached_notebook_path('my_tutorial')
|
228
|
+
PosixPath('.../my_tutorial.ipynb')
|
229
|
+
"""
|
230
|
+
return Path(cache_dir) / f"{tutorial_id}.ipynb"
|
napistu/mcp/utils.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
import httpx
|
2
|
+
|
3
|
+
|
4
|
+
def get_snippet(text: str, query: str, context: int = 100) -> str:
|
5
|
+
"""
|
6
|
+
Get a text snippet around a search term.
|
7
|
+
Args:
|
8
|
+
text: Text to search in
|
9
|
+
query: Search term
|
10
|
+
context: Number of characters to include before and after the match
|
11
|
+
Returns:
|
12
|
+
Text snippet
|
13
|
+
"""
|
14
|
+
query = query.lower()
|
15
|
+
text_lower = text.lower()
|
16
|
+
if query not in text_lower:
|
17
|
+
return ""
|
18
|
+
start_pos = text_lower.find(query)
|
19
|
+
start = max(0, start_pos - context)
|
20
|
+
end = min(len(text), start_pos + len(query) + context)
|
21
|
+
snippet = text[start:end]
|
22
|
+
# Add ellipsis if we're not at the beginning or end
|
23
|
+
if start > 0:
|
24
|
+
snippet = "..." + snippet
|
25
|
+
if end < len(text):
|
26
|
+
snippet = snippet + "..."
|
27
|
+
return snippet
|
28
|
+
|
29
|
+
|
30
|
+
async def load_html_page(url: str) -> str:
|
31
|
+
"""
|
32
|
+
Fetch the HTML content of a page from a URL.
|
33
|
+
Returns the HTML as a string.
|
34
|
+
"""
|
35
|
+
async with httpx.AsyncClient() as client:
|
36
|
+
response = await client.get(url)
|
37
|
+
response.raise_for_status()
|
38
|
+
return response.text
|
39
|
+
|
40
|
+
|
41
|
+
def _clean_signature_text(text: str) -> str:
|
42
|
+
"""
|
43
|
+
Remove trailing Unicode headerlink icons and extra whitespace from text.
|
44
|
+
"""
|
45
|
+
if text:
|
46
|
+
return text.replace("\uf0c1", "").strip()
|
47
|
+
return text
|