napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. napistu/__init__.py +1 -1
  2. napistu/consensus.py +1010 -513
  3. napistu/constants.py +24 -0
  4. napistu/gcs/constants.py +2 -2
  5. napistu/gcs/downloads.py +57 -25
  6. napistu/gcs/utils.py +21 -0
  7. napistu/identifiers.py +105 -6
  8. napistu/ingestion/constants.py +0 -1
  9. napistu/ingestion/obo.py +24 -8
  10. napistu/ingestion/psi_mi.py +20 -5
  11. napistu/ingestion/reactome.py +8 -32
  12. napistu/mcp/__init__.py +69 -0
  13. napistu/mcp/__main__.py +180 -0
  14. napistu/mcp/codebase.py +182 -0
  15. napistu/mcp/codebase_utils.py +298 -0
  16. napistu/mcp/constants.py +72 -0
  17. napistu/mcp/documentation.py +166 -0
  18. napistu/mcp/documentation_utils.py +235 -0
  19. napistu/mcp/execution.py +382 -0
  20. napistu/mcp/profiles.py +73 -0
  21. napistu/mcp/server.py +86 -0
  22. napistu/mcp/tutorials.py +124 -0
  23. napistu/mcp/tutorials_utils.py +230 -0
  24. napistu/mcp/utils.py +47 -0
  25. napistu/mechanism_matching.py +782 -26
  26. napistu/modify/constants.py +41 -0
  27. napistu/modify/curation.py +4 -1
  28. napistu/modify/gaps.py +243 -156
  29. napistu/modify/pathwayannot.py +26 -8
  30. napistu/network/neighborhoods.py +16 -7
  31. napistu/network/net_create.py +209 -54
  32. napistu/network/net_propagation.py +118 -0
  33. napistu/network/net_utils.py +1 -32
  34. napistu/rpy2/netcontextr.py +10 -7
  35. napistu/rpy2/rids.py +7 -5
  36. napistu/sbml_dfs_core.py +46 -29
  37. napistu/sbml_dfs_utils.py +37 -1
  38. napistu/source.py +8 -2
  39. napistu/utils.py +67 -8
  40. napistu-0.2.4.dev3.dist-info/METADATA +84 -0
  41. napistu-0.2.4.dev3.dist-info/RECORD +95 -0
  42. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
  43. tests/conftest.py +11 -5
  44. tests/test_consensus.py +4 -1
  45. tests/test_gaps.py +127 -0
  46. tests/test_gcs.py +3 -2
  47. tests/test_igraph.py +14 -0
  48. tests/test_mcp_documentation_utils.py +13 -0
  49. tests/test_mechanism_matching.py +658 -0
  50. tests/test_net_propagation.py +89 -0
  51. tests/test_net_utils.py +83 -0
  52. tests/test_sbml.py +2 -0
  53. tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
  54. tests/test_utils.py +81 -0
  55. napistu-0.1.0.dist-info/METADATA +0 -56
  56. napistu-0.1.0.dist-info/RECORD +0 -77
  57. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
  58. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
  59. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,180 @@
1
+ """
2
+ MCP (Model Context Protocol) Server for Napistu.
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ import click
8
+ import click_logging
9
+
10
+ import napistu
11
+ from napistu.mcp.profiles import get_profile, ServerProfile
12
+ from napistu.mcp.server import create_server
13
+
14
+ logger = logging.getLogger(napistu.__name__)
15
+ click_logging.basic_config(logger)
16
+
17
+
18
+ @click.group()
19
+ def cli():
20
+ """The Napistu MCP (Model Context Protocol) Server CLI"""
21
+ pass
22
+
23
+
24
+ @click.group()
25
+ def server():
26
+ """Start and manage MCP servers."""
27
+ pass
28
+
29
+
30
+ @server.command(name="start")
31
+ @click.option(
32
+ "--profile",
33
+ type=click.Choice(["local", "remote", "full"]),
34
+ default="remote",
35
+ help="Predefined configuration profile",
36
+ )
37
+ @click.option("--server-name", type=str, help="Name of the MCP server")
38
+ @click_logging.simple_verbosity_option(logger)
39
+ def start_server(profile, server_name):
40
+ """Start an MCP server with the specified profile."""
41
+ # Collect configuration
42
+ config = {}
43
+ if server_name:
44
+ config["server_name"] = server_name
45
+
46
+ # Get profile with overrides
47
+ server_profile = get_profile(profile, **config)
48
+
49
+ # Create and start the server
50
+ logger.info(f"Starting Napistu MCP Server with {profile} profile...")
51
+ server = create_server(server_profile)
52
+ asyncio.run(server.start())
53
+
54
+
55
+ @server.command(name="local")
56
+ @click.option(
57
+ "--server-name", type=str, default="napistu-local", help="Name of the MCP server"
58
+ )
59
+ @click_logging.simple_verbosity_option(logger)
60
+ def start_local(server_name):
61
+ """Start a local MCP server optimized for function execution."""
62
+ # Get profile with overrides
63
+ server_profile = get_profile("local", server_name=server_name)
64
+
65
+ # Create and start the server
66
+ logger.info("Starting Napistu local MCP Server...")
67
+ server = create_server(server_profile)
68
+ asyncio.run(server.start())
69
+
70
+
71
+ @server.command(name="remote")
72
+ @click.option(
73
+ "--server-name", type=str, default="napistu-docs", help="Name of the MCP server"
74
+ )
75
+ @click.option("--codebase-path", type=str, help="Path to the Napistu codebase")
76
+ @click.option(
77
+ "--docs-paths",
78
+ type=str,
79
+ help="Comma-separated list of paths to documentation files",
80
+ )
81
+ @click.option("--tutorials-path", type=str, help="Path to the tutorials directory")
82
+ @click_logging.simple_verbosity_option(logger)
83
+ def start_remote(server_name, tutorials_path):
84
+ """Start a remote MCP server for documentation and codebase exploration."""
85
+ # Collect configuration
86
+ config = {"server_name": server_name}
87
+ if tutorials_path:
88
+ config["tutorials_path"] = tutorials_path
89
+
90
+ # Get profile with overrides
91
+ server_profile = get_profile("remote", **config)
92
+
93
+ # Create and start the server
94
+ logger.info("Starting Napistu remote MCP Server...")
95
+ server = create_server(server_profile)
96
+ asyncio.run(server.start())
97
+
98
+
99
+ @click.group()
100
+ def component():
101
+ """Enable or disable specific MCP server components."""
102
+ pass
103
+
104
+
105
+ @component.command(name="list")
106
+ def list_components():
107
+ """List available MCP server components."""
108
+ click.echo("Available MCP server components:")
109
+ click.echo(" - documentation: Documentation components")
110
+ click.echo(" - codebase: Codebase exploration components")
111
+ click.echo(" - execution: Function execution components")
112
+ click.echo(" - tutorials: Tutorial components")
113
+
114
+
115
+ @component.command(name="custom")
116
+ @click.option(
117
+ "--enable-documentation/--disable-documentation",
118
+ default=None,
119
+ help="Enable/disable documentation components",
120
+ )
121
+ @click.option(
122
+ "--enable-codebase/--disable-codebase",
123
+ default=None,
124
+ help="Enable/disable codebase exploration components",
125
+ )
126
+ @click.option(
127
+ "--enable-execution/--disable-execution",
128
+ default=None,
129
+ help="Enable/disable function execution components",
130
+ )
131
+ @click.option(
132
+ "--enable-tutorials/--disable-tutorials",
133
+ default=None,
134
+ help="Enable/disable tutorial components",
135
+ )
136
+ @click.option(
137
+ "--server-name", type=str, default="napistu-custom", help="Name of the MCP server"
138
+ )
139
+ @click.option("--codebase-path", type=str, help="Path to the Napistu codebase")
140
+ @click.option(
141
+ "--docs-paths",
142
+ type=str,
143
+ help="Comma-separated list of paths to documentation files",
144
+ )
145
+ @click.option("--tutorials-path", type=str, help="Path to the tutorials directory")
146
+ @click_logging.simple_verbosity_option(logger)
147
+ def custom_server(
148
+ enable_documentation,
149
+ enable_codebase,
150
+ enable_execution,
151
+ enable_tutorials,
152
+ server_name,
153
+ ):
154
+ """Start an MCP server with custom component configuration."""
155
+ # Collect configuration
156
+ config = {"server_name": server_name}
157
+ if enable_documentation is not None:
158
+ config["enable_documentation"] = enable_documentation
159
+ if enable_codebase is not None:
160
+ config["enable_codebase"] = enable_codebase
161
+ if enable_execution is not None:
162
+ config["enable_execution"] = enable_execution
163
+ if enable_tutorials is not None:
164
+ config["enable_tutorials"] = enable_tutorials
165
+
166
+ # Create a custom profile
167
+ server_profile = ServerProfile(**config)
168
+
169
+ # Create and start the server
170
+ logger.info("Starting Napistu custom MCP Server...")
171
+ server = create_server(server_profile)
172
+ asyncio.run(server.start())
173
+
174
+
175
+ # Add command groups to the CLI
176
+ cli.add_command(server)
177
+ cli.add_command(component)
178
+
179
+ if __name__ == "__main__":
180
+ cli()
@@ -0,0 +1,182 @@
1
+ """
2
+ Codebase exploration components for the Napistu MCP server.
3
+ """
4
+
5
+ from napistu.mcp.constants import NAPISTU_PY_READTHEDOCS_API
6
+
7
+ from fastmcp import FastMCP
8
+
9
+ from typing import Dict, Any
10
+ import json
11
+
12
+ from napistu.mcp import codebase_utils
13
+ from napistu.mcp import utils as mcp_utils
14
+
15
+ # Global cache for codebase information
16
+ _codebase_cache = {
17
+ "modules": {},
18
+ "classes": {},
19
+ "functions": {},
20
+ }
21
+
22
+
23
+ async def initialize_components() -> bool:
24
+ """
25
+ Initialize codebase components.
26
+
27
+ Returns
28
+ -------
29
+ bool
30
+ True if initialization is successful.
31
+ """
32
+ global _codebase_cache
33
+ # Load documentation from the ReadTheDocs API
34
+ _codebase_cache["modules"] = await codebase_utils.read_read_the_docs(
35
+ NAPISTU_PY_READTHEDOCS_API
36
+ )
37
+ # Extract functions and classes from the modules
38
+ _codebase_cache["functions"], _codebase_cache["classes"] = (
39
+ codebase_utils.extract_functions_and_classes_from_modules(
40
+ _codebase_cache["modules"]
41
+ )
42
+ )
43
+ return True
44
+
45
+
46
+ def register_components(mcp: FastMCP):
47
+ """
48
+ Register codebase exploration components with the MCP server.
49
+
50
+ Args:
51
+ mcp: FastMCP server instance
52
+ """
53
+ global _codebase_cache
54
+
55
+ # Register resources
56
+ @mcp.resource("napistu://codebase/summary")
57
+ async def get_codebase_summary() -> Dict[str, Any]:
58
+ """
59
+ Get a summary of the Napistu codebase structure.
60
+ """
61
+ return {
62
+ "modules": list(_codebase_cache["modules"].keys()),
63
+ "top_level_classes": [
64
+ class_name
65
+ for class_name, info in _codebase_cache["classes"].items()
66
+ if "." not in class_name # Only include top-level classes
67
+ ],
68
+ "top_level_functions": [
69
+ func_name
70
+ for func_name, info in _codebase_cache["functions"].items()
71
+ if "." not in func_name # Only include top-level functions
72
+ ],
73
+ }
74
+
75
+ @mcp.resource("napistu://codebase/modules/{module_name}")
76
+ async def get_module_details(module_name: str) -> Dict[str, Any]:
77
+ """
78
+ Get detailed information about a specific module.
79
+
80
+ Args:
81
+ module_name: Name of the module
82
+ """
83
+ if module_name not in _codebase_cache["modules"]:
84
+ return {"error": f"Module {module_name} not found"}
85
+
86
+ return _codebase_cache["modules"][module_name]
87
+
88
+ # Register tools
89
+ @mcp.tool()
90
+ async def search_codebase(query: str) -> Dict[str, Any]:
91
+ """
92
+ Search the codebase for a specific query.
93
+
94
+ Args:
95
+ query: Search term
96
+
97
+ Returns:
98
+ Dictionary with search results organized by code element type, including snippets for context.
99
+ """
100
+ results = {
101
+ "modules": [],
102
+ "classes": [],
103
+ "functions": [],
104
+ }
105
+
106
+ # Search modules
107
+ for module_name, info in _codebase_cache["modules"].items():
108
+ # Use docstring or description for snippet
109
+ doc = info.get("doc") or info.get("description") or ""
110
+ module_text = json.dumps(info)
111
+ if query.lower() in module_text.lower():
112
+ snippet = mcp_utils.get_snippet(doc, query)
113
+ results["modules"].append(
114
+ {
115
+ "name": module_name,
116
+ "description": doc,
117
+ "snippet": snippet,
118
+ }
119
+ )
120
+
121
+ # Search classes
122
+ for class_name, info in _codebase_cache["classes"].items():
123
+ doc = info.get("doc") or info.get("description") or ""
124
+ class_text = json.dumps(info)
125
+ if query.lower() in class_text.lower():
126
+ snippet = mcp_utils.get_snippet(doc, query)
127
+ results["classes"].append(
128
+ {
129
+ "name": class_name,
130
+ "description": doc,
131
+ "snippet": snippet,
132
+ }
133
+ )
134
+
135
+ # Search functions
136
+ for func_name, info in _codebase_cache["functions"].items():
137
+ doc = info.get("doc") or info.get("description") or ""
138
+ func_text = json.dumps(info)
139
+ if query.lower() in func_text.lower():
140
+ snippet = mcp_utils.get_snippet(doc, query)
141
+ results["functions"].append(
142
+ {
143
+ "name": func_name,
144
+ "description": doc,
145
+ "signature": info.get("signature", ""),
146
+ "snippet": snippet,
147
+ }
148
+ )
149
+
150
+ return results
151
+
152
+ @mcp.tool()
153
+ async def get_function_documentation(function_name: str) -> Dict[str, Any]:
154
+ """
155
+ Get detailed documentation for a specific function.
156
+
157
+ Args:
158
+ function_name: Name of the function
159
+
160
+ Returns:
161
+ Dictionary with function documentation
162
+ """
163
+ if function_name not in _codebase_cache["functions"]:
164
+ return {"error": f"Function {function_name} not found"}
165
+
166
+ return _codebase_cache["functions"][function_name]
167
+
168
+ @mcp.tool()
169
+ async def get_class_documentation(class_name: str) -> Dict[str, Any]:
170
+ """
171
+ Get detailed documentation for a specific class.
172
+
173
+ Args:
174
+ class_name: Name of the class
175
+
176
+ Returns:
177
+ Dictionary with class documentation
178
+ """
179
+ if class_name not in _codebase_cache["classes"]:
180
+ return {"error": f"Class {class_name} not found"}
181
+
182
+ return _codebase_cache["classes"][class_name]
@@ -0,0 +1,298 @@
1
+ """
2
+ Utilities for scanning and analyzing the Napistu codebase.
3
+ """
4
+
5
+ from typing import Dict, Optional, Any, Set
6
+
7
+ from napistu.mcp import utils as mcp_utils
8
+ from napistu.mcp.constants import READTHEDOCS_TOC_CSS_SELECTOR
9
+
10
+ # Import optional dependencies with error handling
11
+ try:
12
+ from bs4 import BeautifulSoup
13
+ except ImportError:
14
+ raise ImportError(
15
+ "Documentation utilities require additional dependencies. Install with 'pip install napistu[mcp]'"
16
+ )
17
+
18
+
19
+ async def read_read_the_docs(package_toc_url: str) -> dict:
20
+ """
21
+ Recursively parse all modules and submodules starting from the package TOC.
22
+ """
23
+ # Step 1: Get all module URLs from the TOC
24
+ packages_dict = await _process_rtd_package_toc(package_toc_url)
25
+ docs_dict = {}
26
+ visited = set()
27
+
28
+ # Step 2: Recursively parse each module page
29
+ for package_name, module_url in packages_dict.items():
30
+ if not module_url.startswith("http"):
31
+ # Make absolute if needed
32
+ base = package_toc_url.rsplit("/", 1)[0]
33
+ module_url = base + "/" + module_url.lstrip("/")
34
+ await _parse_rtd_module_recursive(module_url, visited, docs_dict)
35
+
36
+ return docs_dict
37
+
38
+
39
+ def extract_functions_and_classes_from_modules(modules: dict) -> tuple[dict, dict]:
40
+ """
41
+ Process the modules cache and return a tuple (functions_dict, classes_dict),
42
+ where each is a dict keyed by fully qualified name (e.g., 'module.func', 'module.Class').
43
+ Recursively processes submodules.
44
+
45
+ Args:
46
+ modules (dict): The modules cache as returned by read_read_the_docs.
47
+
48
+ Returns:
49
+ tuple: (functions_dict, classes_dict)
50
+ """
51
+ functions = {}
52
+ classes = {}
53
+
54
+ def _process_module(module_name: str, module_info: dict):
55
+ # Functions
56
+ for func_name, func_info in module_info.get("functions", {}).items():
57
+ fq_name = f"{module_name}.{func_name}"
58
+ functions[fq_name] = func_info
59
+ # Classes
60
+ for class_name, class_info in module_info.get("classes", {}).items():
61
+ fq_name = f"{module_name}.{class_name}"
62
+ classes[fq_name] = class_info
63
+ # Submodules (if present in the cache)
64
+ for submod_name in module_info.get("submodules", {}):
65
+ fq_submod_name = f"{module_name}.{submod_name}"
66
+ if fq_submod_name in modules:
67
+ _process_module(fq_submod_name, modules[fq_submod_name])
68
+
69
+ for module_name, module_info in modules.items():
70
+ _process_module(module_name, module_info)
71
+
72
+ return functions, classes
73
+
74
+
75
+ def _parse_rtd_module_page(html: str, url: Optional[str] = None) -> dict:
76
+ """
77
+ Parse a ReadTheDocs module HTML page and extract functions, classes, methods, attributes, and submodules.
78
+ Returns a dict suitable for MCP server use, with functions, classes, and methods keyed by name.
79
+
80
+ Args:
81
+ html (str): The HTML content of the module page.
82
+ url (Optional[str]): The URL of the page (for reference).
83
+
84
+ Returns:
85
+ dict: {
86
+ 'module': str,
87
+ 'url': str,
88
+ 'functions': Dict[str, dict],
89
+ 'classes': Dict[str, dict],
90
+ 'submodules': Dict[str, dict]
91
+ }
92
+ """
93
+ soup = BeautifulSoup(html, "html.parser")
94
+ result = {
95
+ "module": None,
96
+ "url": url,
97
+ "functions": {},
98
+ "classes": {},
99
+ "submodules": _format_submodules(soup),
100
+ }
101
+ # Get module name from <h1>
102
+ h1 = soup.find("h1")
103
+ if h1:
104
+ module_name = h1.get_text(strip=True).replace("\uf0c1", "").strip()
105
+ result["module"] = module_name
106
+ # Functions
107
+ for func_dl in soup.find_all("dl", class_="py function"):
108
+ func = _format_function(func_dl.find("dt"), func_dl.find("dd"))
109
+ if func["name"]:
110
+ result["functions"][func["name"]] = func
111
+ # Classes
112
+ for class_dl in soup.find_all("dl", class_="py class"):
113
+ cls = _format_class(class_dl)
114
+ if cls["name"]:
115
+ result["classes"][cls["name"]] = cls
116
+ return result
117
+
118
+
119
+ async def _process_rtd_package_toc(
120
+ url: str, css_selector: str = READTHEDOCS_TOC_CSS_SELECTOR
121
+ ) -> dict:
122
+ """
123
+ Parse the ReadTheDocs package TOC and return a dict of {name: url}.
124
+ """
125
+ page_html = await mcp_utils.load_html_page(url)
126
+ soup = BeautifulSoup(page_html, "html.parser")
127
+ selected = soup.select(css_selector)
128
+ return _parse_module_tags(selected)
129
+
130
+
131
+ def _parse_module_tags(td_list: list, base_url: str = "") -> dict:
132
+ """
133
+ Parse a list of <td> elements containing module links and return a dict of {name: url}.
134
+ Optionally prepends base_url to relative hrefs.
135
+ """
136
+ result = {}
137
+ for td in td_list:
138
+ a = td.find("a", class_="reference internal")
139
+ if a:
140
+ # Get the module name from the <span class="pre"> tag
141
+ span = a.find("span", class_="pre")
142
+ if span:
143
+ name = span.text.strip()
144
+ href = a.get("href")
145
+ # Prepend base_url if href is relative
146
+ if href and not href.startswith("http"):
147
+ href = base_url.rstrip("/") + "/" + href.lstrip("/")
148
+ result[name] = href
149
+ return result
150
+
151
+
152
+ def _format_function(sig_dt, doc_dd) -> Dict[str, Any]:
153
+ """
154
+ Format a function or method signature and its documentation into a dictionary.
155
+
156
+ Args:
157
+ sig_dt: The <dt> tag containing the function/method signature.
158
+ doc_dd: The <dd> tag containing the function/method docstring.
159
+
160
+ Returns:
161
+ dict: A dictionary with keys 'name', 'signature', 'id', and 'doc'.
162
+ """
163
+ name = (
164
+ sig_dt.find("span", class_="sig-name").get_text(strip=True) if sig_dt else None
165
+ )
166
+ signature = sig_dt.get_text(strip=True) if sig_dt else None
167
+ return {
168
+ "name": mcp_utils._clean_signature_text(name),
169
+ "signature": mcp_utils._clean_signature_text(signature),
170
+ "id": sig_dt.get("id") if sig_dt else None,
171
+ "doc": doc_dd.get_text(" ", strip=True) if doc_dd else None,
172
+ }
173
+
174
+
175
+ def _format_attribute(attr_dl) -> Dict[str, Any]:
176
+ """
177
+ Format a class attribute's signature and documentation into a dictionary.
178
+
179
+ Args:
180
+ attr_dl: The <dl> tag for the attribute, containing <dt> and <dd>.
181
+
182
+ Returns:
183
+ dict: A dictionary with keys 'name', 'signature', 'id', and 'doc'.
184
+ """
185
+ sig = attr_dl.find("dt")
186
+ doc = attr_dl.find("dd")
187
+ name = sig.find("span", class_="sig-name").get_text(strip=True) if sig else None
188
+ signature = sig.get_text(strip=True) if sig else None
189
+ return {
190
+ "name": mcp_utils._clean_signature_text(name),
191
+ "signature": mcp_utils._clean_signature_text(signature),
192
+ "id": sig.get("id") if sig else None,
193
+ "doc": doc.get_text(" ", strip=True) if doc else None,
194
+ }
195
+
196
+
197
+ def _format_class(class_dl) -> Dict[str, Any]:
198
+ """
199
+ Format a class definition, including its methods and attributes, into a dictionary.
200
+
201
+ Args:
202
+ class_dl: The <dl> tag for the class, containing <dt> and <dd>.
203
+
204
+ Returns:
205
+ dict: A dictionary with keys 'name', 'signature', 'id', 'doc', 'methods', and 'attributes'.
206
+ 'methods' and 'attributes' are themselves dicts keyed by name.
207
+ """
208
+ sig = class_dl.find("dt")
209
+ doc = class_dl.find("dd")
210
+ class_name = (
211
+ sig.find("span", class_="sig-name").get_text(strip=True) if sig else None
212
+ )
213
+ methods = {}
214
+ attributes = {}
215
+ if doc:
216
+ for meth_dl in doc.find_all("dl", class_="py method"):
217
+ meth = _format_function(meth_dl.find("dt"), meth_dl.find("dd"))
218
+ if meth["name"]:
219
+ methods[meth["name"]] = meth
220
+ for attr_dl in doc.find_all("dl", class_="py attribute"):
221
+ attr = _format_attribute(attr_dl)
222
+ if attr["name"]:
223
+ attributes[attr["name"]] = attr
224
+ return {
225
+ "name": mcp_utils._clean_signature_text(class_name),
226
+ "signature": mcp_utils._clean_signature_text(
227
+ sig.get_text(strip=True) if sig else None
228
+ ),
229
+ "id": sig.get("id") if sig else None,
230
+ "doc": doc.get_text(" ", strip=True) if doc else None,
231
+ "methods": methods,
232
+ "attributes": attributes,
233
+ }
234
+
235
+
236
+ def _format_submodules(soup) -> dict:
237
+ """
238
+ Extract submodules from a ReadTheDocs module page soup object.
239
+ Looks for a 'Modules' rubric and parses the following table or list for submodule names, URLs, and descriptions.
240
+
241
+ Args:
242
+ soup (BeautifulSoup): Parsed HTML soup of the module page.
243
+
244
+ Returns:
245
+ dict: {submodule_name: {"url": str, "description": str}}
246
+ """
247
+ submodules = {}
248
+ for rubric in soup.find_all("p", class_="rubric"):
249
+ if rubric.get_text(strip=True).lower() == "modules":
250
+ sib = rubric.find_next_sibling()
251
+ if sib and sib.name in ("table", "ul"):
252
+ for a in sib.find_all("a", href=True):
253
+ submod_name = a.get_text(strip=True)
254
+ submod_url = a["href"]
255
+ desc = ""
256
+ td = a.find_parent("td")
257
+ if td and td.find_next_sibling("td"):
258
+ desc = td.find_next_sibling("td").get_text(strip=True)
259
+ elif a.parent.name == "li":
260
+ next_p = a.find_next_sibling("p")
261
+ if next_p:
262
+ desc = next_p.get_text(strip=True)
263
+ submodules[submod_name] = {"url": submod_url, "description": desc}
264
+ return submodules
265
+
266
+
267
+ async def _parse_rtd_module_recursive(
268
+ module_url: str,
269
+ visited: Optional[Set[str]] = None,
270
+ docs_dict: Optional[Dict[str, Any]] = None,
271
+ ) -> Dict[str, Any]:
272
+ """
273
+ Recursively parse a module page and all its submodules.
274
+ """
275
+
276
+ if visited is None:
277
+ visited = set()
278
+ if docs_dict is None:
279
+ docs_dict = {}
280
+
281
+ if module_url in visited:
282
+ return docs_dict
283
+ visited.add(module_url)
284
+
285
+ page_html = await mcp_utils.load_html_page(module_url)
286
+ module_doc = _parse_rtd_module_page(page_html, module_url)
287
+ module_name = module_doc.get("module") or module_url
288
+ docs_dict[module_name] = module_doc
289
+
290
+ # Recursively parse submodules
291
+ for submod_name, submod_info in module_doc.get("submodules", {}).items():
292
+ submod_url = submod_info["url"]
293
+ if not submod_url.startswith("http"):
294
+ base = module_url.rsplit("/", 1)[0]
295
+ submod_url = base + "/" + submod_url.lstrip("/")
296
+ await _parse_rtd_module_recursive(submod_url, visited, docs_dict)
297
+
298
+ return docs_dict