napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +1 -1
- napistu/consensus.py +1010 -513
- napistu/constants.py +24 -0
- napistu/gcs/constants.py +2 -2
- napistu/gcs/downloads.py +57 -25
- napistu/gcs/utils.py +21 -0
- napistu/identifiers.py +105 -6
- napistu/ingestion/constants.py +0 -1
- napistu/ingestion/obo.py +24 -8
- napistu/ingestion/psi_mi.py +20 -5
- napistu/ingestion/reactome.py +8 -32
- napistu/mcp/__init__.py +69 -0
- napistu/mcp/__main__.py +180 -0
- napistu/mcp/codebase.py +182 -0
- napistu/mcp/codebase_utils.py +298 -0
- napistu/mcp/constants.py +72 -0
- napistu/mcp/documentation.py +166 -0
- napistu/mcp/documentation_utils.py +235 -0
- napistu/mcp/execution.py +382 -0
- napistu/mcp/profiles.py +73 -0
- napistu/mcp/server.py +86 -0
- napistu/mcp/tutorials.py +124 -0
- napistu/mcp/tutorials_utils.py +230 -0
- napistu/mcp/utils.py +47 -0
- napistu/mechanism_matching.py +782 -26
- napistu/modify/constants.py +41 -0
- napistu/modify/curation.py +4 -1
- napistu/modify/gaps.py +243 -156
- napistu/modify/pathwayannot.py +26 -8
- napistu/network/neighborhoods.py +16 -7
- napistu/network/net_create.py +209 -54
- napistu/network/net_propagation.py +118 -0
- napistu/network/net_utils.py +1 -32
- napistu/rpy2/netcontextr.py +10 -7
- napistu/rpy2/rids.py +7 -5
- napistu/sbml_dfs_core.py +46 -29
- napistu/sbml_dfs_utils.py +37 -1
- napistu/source.py +8 -2
- napistu/utils.py +67 -8
- napistu-0.2.4.dev3.dist-info/METADATA +84 -0
- napistu-0.2.4.dev3.dist-info/RECORD +95 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
- tests/conftest.py +11 -5
- tests/test_consensus.py +4 -1
- tests/test_gaps.py +127 -0
- tests/test_gcs.py +3 -2
- tests/test_igraph.py +14 -0
- tests/test_mcp_documentation_utils.py +13 -0
- tests/test_mechanism_matching.py +658 -0
- tests/test_net_propagation.py +89 -0
- tests/test_net_utils.py +83 -0
- tests/test_sbml.py +2 -0
- tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
- tests/test_utils.py +81 -0
- napistu-0.1.0.dist-info/METADATA +0 -56
- napistu-0.1.0.dist-info/RECORD +0 -77
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
napistu/mcp/__main__.py
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
"""
|
2
|
+
MCP (Model Context Protocol) Server for Napistu.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import asyncio
|
6
|
+
import logging
|
7
|
+
import click
|
8
|
+
import click_logging
|
9
|
+
|
10
|
+
import napistu
|
11
|
+
from napistu.mcp.profiles import get_profile, ServerProfile
|
12
|
+
from napistu.mcp.server import create_server
|
13
|
+
|
14
|
+
logger = logging.getLogger(napistu.__name__)
|
15
|
+
click_logging.basic_config(logger)
|
16
|
+
|
17
|
+
|
18
|
+
@click.group()
|
19
|
+
def cli():
|
20
|
+
"""The Napistu MCP (Model Context Protocol) Server CLI"""
|
21
|
+
pass
|
22
|
+
|
23
|
+
|
24
|
+
@click.group()
|
25
|
+
def server():
|
26
|
+
"""Start and manage MCP servers."""
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
@server.command(name="start")
|
31
|
+
@click.option(
|
32
|
+
"--profile",
|
33
|
+
type=click.Choice(["local", "remote", "full"]),
|
34
|
+
default="remote",
|
35
|
+
help="Predefined configuration profile",
|
36
|
+
)
|
37
|
+
@click.option("--server-name", type=str, help="Name of the MCP server")
|
38
|
+
@click_logging.simple_verbosity_option(logger)
|
39
|
+
def start_server(profile, server_name):
|
40
|
+
"""Start an MCP server with the specified profile."""
|
41
|
+
# Collect configuration
|
42
|
+
config = {}
|
43
|
+
if server_name:
|
44
|
+
config["server_name"] = server_name
|
45
|
+
|
46
|
+
# Get profile with overrides
|
47
|
+
server_profile = get_profile(profile, **config)
|
48
|
+
|
49
|
+
# Create and start the server
|
50
|
+
logger.info(f"Starting Napistu MCP Server with {profile} profile...")
|
51
|
+
server = create_server(server_profile)
|
52
|
+
asyncio.run(server.start())
|
53
|
+
|
54
|
+
|
55
|
+
@server.command(name="local")
|
56
|
+
@click.option(
|
57
|
+
"--server-name", type=str, default="napistu-local", help="Name of the MCP server"
|
58
|
+
)
|
59
|
+
@click_logging.simple_verbosity_option(logger)
|
60
|
+
def start_local(server_name):
|
61
|
+
"""Start a local MCP server optimized for function execution."""
|
62
|
+
# Get profile with overrides
|
63
|
+
server_profile = get_profile("local", server_name=server_name)
|
64
|
+
|
65
|
+
# Create and start the server
|
66
|
+
logger.info("Starting Napistu local MCP Server...")
|
67
|
+
server = create_server(server_profile)
|
68
|
+
asyncio.run(server.start())
|
69
|
+
|
70
|
+
|
71
|
+
@server.command(name="remote")
|
72
|
+
@click.option(
|
73
|
+
"--server-name", type=str, default="napistu-docs", help="Name of the MCP server"
|
74
|
+
)
|
75
|
+
@click.option("--codebase-path", type=str, help="Path to the Napistu codebase")
|
76
|
+
@click.option(
|
77
|
+
"--docs-paths",
|
78
|
+
type=str,
|
79
|
+
help="Comma-separated list of paths to documentation files",
|
80
|
+
)
|
81
|
+
@click.option("--tutorials-path", type=str, help="Path to the tutorials directory")
|
82
|
+
@click_logging.simple_verbosity_option(logger)
|
83
|
+
def start_remote(server_name, tutorials_path):
|
84
|
+
"""Start a remote MCP server for documentation and codebase exploration."""
|
85
|
+
# Collect configuration
|
86
|
+
config = {"server_name": server_name}
|
87
|
+
if tutorials_path:
|
88
|
+
config["tutorials_path"] = tutorials_path
|
89
|
+
|
90
|
+
# Get profile with overrides
|
91
|
+
server_profile = get_profile("remote", **config)
|
92
|
+
|
93
|
+
# Create and start the server
|
94
|
+
logger.info("Starting Napistu remote MCP Server...")
|
95
|
+
server = create_server(server_profile)
|
96
|
+
asyncio.run(server.start())
|
97
|
+
|
98
|
+
|
99
|
+
@click.group()
|
100
|
+
def component():
|
101
|
+
"""Enable or disable specific MCP server components."""
|
102
|
+
pass
|
103
|
+
|
104
|
+
|
105
|
+
@component.command(name="list")
|
106
|
+
def list_components():
|
107
|
+
"""List available MCP server components."""
|
108
|
+
click.echo("Available MCP server components:")
|
109
|
+
click.echo(" - documentation: Documentation components")
|
110
|
+
click.echo(" - codebase: Codebase exploration components")
|
111
|
+
click.echo(" - execution: Function execution components")
|
112
|
+
click.echo(" - tutorials: Tutorial components")
|
113
|
+
|
114
|
+
|
115
|
+
@component.command(name="custom")
|
116
|
+
@click.option(
|
117
|
+
"--enable-documentation/--disable-documentation",
|
118
|
+
default=None,
|
119
|
+
help="Enable/disable documentation components",
|
120
|
+
)
|
121
|
+
@click.option(
|
122
|
+
"--enable-codebase/--disable-codebase",
|
123
|
+
default=None,
|
124
|
+
help="Enable/disable codebase exploration components",
|
125
|
+
)
|
126
|
+
@click.option(
|
127
|
+
"--enable-execution/--disable-execution",
|
128
|
+
default=None,
|
129
|
+
help="Enable/disable function execution components",
|
130
|
+
)
|
131
|
+
@click.option(
|
132
|
+
"--enable-tutorials/--disable-tutorials",
|
133
|
+
default=None,
|
134
|
+
help="Enable/disable tutorial components",
|
135
|
+
)
|
136
|
+
@click.option(
|
137
|
+
"--server-name", type=str, default="napistu-custom", help="Name of the MCP server"
|
138
|
+
)
|
139
|
+
@click.option("--codebase-path", type=str, help="Path to the Napistu codebase")
|
140
|
+
@click.option(
|
141
|
+
"--docs-paths",
|
142
|
+
type=str,
|
143
|
+
help="Comma-separated list of paths to documentation files",
|
144
|
+
)
|
145
|
+
@click.option("--tutorials-path", type=str, help="Path to the tutorials directory")
|
146
|
+
@click_logging.simple_verbosity_option(logger)
|
147
|
+
def custom_server(
|
148
|
+
enable_documentation,
|
149
|
+
enable_codebase,
|
150
|
+
enable_execution,
|
151
|
+
enable_tutorials,
|
152
|
+
server_name,
|
153
|
+
):
|
154
|
+
"""Start an MCP server with custom component configuration."""
|
155
|
+
# Collect configuration
|
156
|
+
config = {"server_name": server_name}
|
157
|
+
if enable_documentation is not None:
|
158
|
+
config["enable_documentation"] = enable_documentation
|
159
|
+
if enable_codebase is not None:
|
160
|
+
config["enable_codebase"] = enable_codebase
|
161
|
+
if enable_execution is not None:
|
162
|
+
config["enable_execution"] = enable_execution
|
163
|
+
if enable_tutorials is not None:
|
164
|
+
config["enable_tutorials"] = enable_tutorials
|
165
|
+
|
166
|
+
# Create a custom profile
|
167
|
+
server_profile = ServerProfile(**config)
|
168
|
+
|
169
|
+
# Create and start the server
|
170
|
+
logger.info("Starting Napistu custom MCP Server...")
|
171
|
+
server = create_server(server_profile)
|
172
|
+
asyncio.run(server.start())
|
173
|
+
|
174
|
+
|
175
|
+
# Add command groups to the CLI
|
176
|
+
cli.add_command(server)
|
177
|
+
cli.add_command(component)
|
178
|
+
|
179
|
+
if __name__ == "__main__":
|
180
|
+
cli()
|
napistu/mcp/codebase.py
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
"""
|
2
|
+
Codebase exploration components for the Napistu MCP server.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from napistu.mcp.constants import NAPISTU_PY_READTHEDOCS_API
|
6
|
+
|
7
|
+
from fastmcp import FastMCP
|
8
|
+
|
9
|
+
from typing import Dict, Any
|
10
|
+
import json
|
11
|
+
|
12
|
+
from napistu.mcp import codebase_utils
|
13
|
+
from napistu.mcp import utils as mcp_utils
|
14
|
+
|
15
|
+
# Global cache for codebase information
|
16
|
+
_codebase_cache = {
|
17
|
+
"modules": {},
|
18
|
+
"classes": {},
|
19
|
+
"functions": {},
|
20
|
+
}
|
21
|
+
|
22
|
+
|
23
|
+
async def initialize_components() -> bool:
|
24
|
+
"""
|
25
|
+
Initialize codebase components.
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
bool
|
30
|
+
True if initialization is successful.
|
31
|
+
"""
|
32
|
+
global _codebase_cache
|
33
|
+
# Load documentation from the ReadTheDocs API
|
34
|
+
_codebase_cache["modules"] = await codebase_utils.read_read_the_docs(
|
35
|
+
NAPISTU_PY_READTHEDOCS_API
|
36
|
+
)
|
37
|
+
# Extract functions and classes from the modules
|
38
|
+
_codebase_cache["functions"], _codebase_cache["classes"] = (
|
39
|
+
codebase_utils.extract_functions_and_classes_from_modules(
|
40
|
+
_codebase_cache["modules"]
|
41
|
+
)
|
42
|
+
)
|
43
|
+
return True
|
44
|
+
|
45
|
+
|
46
|
+
def register_components(mcp: FastMCP):
|
47
|
+
"""
|
48
|
+
Register codebase exploration components with the MCP server.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
mcp: FastMCP server instance
|
52
|
+
"""
|
53
|
+
global _codebase_cache
|
54
|
+
|
55
|
+
# Register resources
|
56
|
+
@mcp.resource("napistu://codebase/summary")
|
57
|
+
async def get_codebase_summary() -> Dict[str, Any]:
|
58
|
+
"""
|
59
|
+
Get a summary of the Napistu codebase structure.
|
60
|
+
"""
|
61
|
+
return {
|
62
|
+
"modules": list(_codebase_cache["modules"].keys()),
|
63
|
+
"top_level_classes": [
|
64
|
+
class_name
|
65
|
+
for class_name, info in _codebase_cache["classes"].items()
|
66
|
+
if "." not in class_name # Only include top-level classes
|
67
|
+
],
|
68
|
+
"top_level_functions": [
|
69
|
+
func_name
|
70
|
+
for func_name, info in _codebase_cache["functions"].items()
|
71
|
+
if "." not in func_name # Only include top-level functions
|
72
|
+
],
|
73
|
+
}
|
74
|
+
|
75
|
+
@mcp.resource("napistu://codebase/modules/{module_name}")
|
76
|
+
async def get_module_details(module_name: str) -> Dict[str, Any]:
|
77
|
+
"""
|
78
|
+
Get detailed information about a specific module.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
module_name: Name of the module
|
82
|
+
"""
|
83
|
+
if module_name not in _codebase_cache["modules"]:
|
84
|
+
return {"error": f"Module {module_name} not found"}
|
85
|
+
|
86
|
+
return _codebase_cache["modules"][module_name]
|
87
|
+
|
88
|
+
# Register tools
|
89
|
+
@mcp.tool()
|
90
|
+
async def search_codebase(query: str) -> Dict[str, Any]:
|
91
|
+
"""
|
92
|
+
Search the codebase for a specific query.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
query: Search term
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
Dictionary with search results organized by code element type, including snippets for context.
|
99
|
+
"""
|
100
|
+
results = {
|
101
|
+
"modules": [],
|
102
|
+
"classes": [],
|
103
|
+
"functions": [],
|
104
|
+
}
|
105
|
+
|
106
|
+
# Search modules
|
107
|
+
for module_name, info in _codebase_cache["modules"].items():
|
108
|
+
# Use docstring or description for snippet
|
109
|
+
doc = info.get("doc") or info.get("description") or ""
|
110
|
+
module_text = json.dumps(info)
|
111
|
+
if query.lower() in module_text.lower():
|
112
|
+
snippet = mcp_utils.get_snippet(doc, query)
|
113
|
+
results["modules"].append(
|
114
|
+
{
|
115
|
+
"name": module_name,
|
116
|
+
"description": doc,
|
117
|
+
"snippet": snippet,
|
118
|
+
}
|
119
|
+
)
|
120
|
+
|
121
|
+
# Search classes
|
122
|
+
for class_name, info in _codebase_cache["classes"].items():
|
123
|
+
doc = info.get("doc") or info.get("description") or ""
|
124
|
+
class_text = json.dumps(info)
|
125
|
+
if query.lower() in class_text.lower():
|
126
|
+
snippet = mcp_utils.get_snippet(doc, query)
|
127
|
+
results["classes"].append(
|
128
|
+
{
|
129
|
+
"name": class_name,
|
130
|
+
"description": doc,
|
131
|
+
"snippet": snippet,
|
132
|
+
}
|
133
|
+
)
|
134
|
+
|
135
|
+
# Search functions
|
136
|
+
for func_name, info in _codebase_cache["functions"].items():
|
137
|
+
doc = info.get("doc") or info.get("description") or ""
|
138
|
+
func_text = json.dumps(info)
|
139
|
+
if query.lower() in func_text.lower():
|
140
|
+
snippet = mcp_utils.get_snippet(doc, query)
|
141
|
+
results["functions"].append(
|
142
|
+
{
|
143
|
+
"name": func_name,
|
144
|
+
"description": doc,
|
145
|
+
"signature": info.get("signature", ""),
|
146
|
+
"snippet": snippet,
|
147
|
+
}
|
148
|
+
)
|
149
|
+
|
150
|
+
return results
|
151
|
+
|
152
|
+
@mcp.tool()
|
153
|
+
async def get_function_documentation(function_name: str) -> Dict[str, Any]:
|
154
|
+
"""
|
155
|
+
Get detailed documentation for a specific function.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
function_name: Name of the function
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
Dictionary with function documentation
|
162
|
+
"""
|
163
|
+
if function_name not in _codebase_cache["functions"]:
|
164
|
+
return {"error": f"Function {function_name} not found"}
|
165
|
+
|
166
|
+
return _codebase_cache["functions"][function_name]
|
167
|
+
|
168
|
+
@mcp.tool()
|
169
|
+
async def get_class_documentation(class_name: str) -> Dict[str, Any]:
|
170
|
+
"""
|
171
|
+
Get detailed documentation for a specific class.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
class_name: Name of the class
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
Dictionary with class documentation
|
178
|
+
"""
|
179
|
+
if class_name not in _codebase_cache["classes"]:
|
180
|
+
return {"error": f"Class {class_name} not found"}
|
181
|
+
|
182
|
+
return _codebase_cache["classes"][class_name]
|
@@ -0,0 +1,298 @@
|
|
1
|
+
"""
|
2
|
+
Utilities for scanning and analyzing the Napistu codebase.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Dict, Optional, Any, Set
|
6
|
+
|
7
|
+
from napistu.mcp import utils as mcp_utils
|
8
|
+
from napistu.mcp.constants import READTHEDOCS_TOC_CSS_SELECTOR
|
9
|
+
|
10
|
+
# Import optional dependencies with error handling
|
11
|
+
try:
|
12
|
+
from bs4 import BeautifulSoup
|
13
|
+
except ImportError:
|
14
|
+
raise ImportError(
|
15
|
+
"Documentation utilities require additional dependencies. Install with 'pip install napistu[mcp]'"
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
async def read_read_the_docs(package_toc_url: str) -> dict:
|
20
|
+
"""
|
21
|
+
Recursively parse all modules and submodules starting from the package TOC.
|
22
|
+
"""
|
23
|
+
# Step 1: Get all module URLs from the TOC
|
24
|
+
packages_dict = await _process_rtd_package_toc(package_toc_url)
|
25
|
+
docs_dict = {}
|
26
|
+
visited = set()
|
27
|
+
|
28
|
+
# Step 2: Recursively parse each module page
|
29
|
+
for package_name, module_url in packages_dict.items():
|
30
|
+
if not module_url.startswith("http"):
|
31
|
+
# Make absolute if needed
|
32
|
+
base = package_toc_url.rsplit("/", 1)[0]
|
33
|
+
module_url = base + "/" + module_url.lstrip("/")
|
34
|
+
await _parse_rtd_module_recursive(module_url, visited, docs_dict)
|
35
|
+
|
36
|
+
return docs_dict
|
37
|
+
|
38
|
+
|
39
|
+
def extract_functions_and_classes_from_modules(modules: dict) -> tuple[dict, dict]:
|
40
|
+
"""
|
41
|
+
Process the modules cache and return a tuple (functions_dict, classes_dict),
|
42
|
+
where each is a dict keyed by fully qualified name (e.g., 'module.func', 'module.Class').
|
43
|
+
Recursively processes submodules.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
modules (dict): The modules cache as returned by read_read_the_docs.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
tuple: (functions_dict, classes_dict)
|
50
|
+
"""
|
51
|
+
functions = {}
|
52
|
+
classes = {}
|
53
|
+
|
54
|
+
def _process_module(module_name: str, module_info: dict):
|
55
|
+
# Functions
|
56
|
+
for func_name, func_info in module_info.get("functions", {}).items():
|
57
|
+
fq_name = f"{module_name}.{func_name}"
|
58
|
+
functions[fq_name] = func_info
|
59
|
+
# Classes
|
60
|
+
for class_name, class_info in module_info.get("classes", {}).items():
|
61
|
+
fq_name = f"{module_name}.{class_name}"
|
62
|
+
classes[fq_name] = class_info
|
63
|
+
# Submodules (if present in the cache)
|
64
|
+
for submod_name in module_info.get("submodules", {}):
|
65
|
+
fq_submod_name = f"{module_name}.{submod_name}"
|
66
|
+
if fq_submod_name in modules:
|
67
|
+
_process_module(fq_submod_name, modules[fq_submod_name])
|
68
|
+
|
69
|
+
for module_name, module_info in modules.items():
|
70
|
+
_process_module(module_name, module_info)
|
71
|
+
|
72
|
+
return functions, classes
|
73
|
+
|
74
|
+
|
75
|
+
def _parse_rtd_module_page(html: str, url: Optional[str] = None) -> dict:
|
76
|
+
"""
|
77
|
+
Parse a ReadTheDocs module HTML page and extract functions, classes, methods, attributes, and submodules.
|
78
|
+
Returns a dict suitable for MCP server use, with functions, classes, and methods keyed by name.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
html (str): The HTML content of the module page.
|
82
|
+
url (Optional[str]): The URL of the page (for reference).
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
dict: {
|
86
|
+
'module': str,
|
87
|
+
'url': str,
|
88
|
+
'functions': Dict[str, dict],
|
89
|
+
'classes': Dict[str, dict],
|
90
|
+
'submodules': Dict[str, dict]
|
91
|
+
}
|
92
|
+
"""
|
93
|
+
soup = BeautifulSoup(html, "html.parser")
|
94
|
+
result = {
|
95
|
+
"module": None,
|
96
|
+
"url": url,
|
97
|
+
"functions": {},
|
98
|
+
"classes": {},
|
99
|
+
"submodules": _format_submodules(soup),
|
100
|
+
}
|
101
|
+
# Get module name from <h1>
|
102
|
+
h1 = soup.find("h1")
|
103
|
+
if h1:
|
104
|
+
module_name = h1.get_text(strip=True).replace("\uf0c1", "").strip()
|
105
|
+
result["module"] = module_name
|
106
|
+
# Functions
|
107
|
+
for func_dl in soup.find_all("dl", class_="py function"):
|
108
|
+
func = _format_function(func_dl.find("dt"), func_dl.find("dd"))
|
109
|
+
if func["name"]:
|
110
|
+
result["functions"][func["name"]] = func
|
111
|
+
# Classes
|
112
|
+
for class_dl in soup.find_all("dl", class_="py class"):
|
113
|
+
cls = _format_class(class_dl)
|
114
|
+
if cls["name"]:
|
115
|
+
result["classes"][cls["name"]] = cls
|
116
|
+
return result
|
117
|
+
|
118
|
+
|
119
|
+
async def _process_rtd_package_toc(
|
120
|
+
url: str, css_selector: str = READTHEDOCS_TOC_CSS_SELECTOR
|
121
|
+
) -> dict:
|
122
|
+
"""
|
123
|
+
Parse the ReadTheDocs package TOC and return a dict of {name: url}.
|
124
|
+
"""
|
125
|
+
page_html = await mcp_utils.load_html_page(url)
|
126
|
+
soup = BeautifulSoup(page_html, "html.parser")
|
127
|
+
selected = soup.select(css_selector)
|
128
|
+
return _parse_module_tags(selected)
|
129
|
+
|
130
|
+
|
131
|
+
def _parse_module_tags(td_list: list, base_url: str = "") -> dict:
|
132
|
+
"""
|
133
|
+
Parse a list of <td> elements containing module links and return a dict of {name: url}.
|
134
|
+
Optionally prepends base_url to relative hrefs.
|
135
|
+
"""
|
136
|
+
result = {}
|
137
|
+
for td in td_list:
|
138
|
+
a = td.find("a", class_="reference internal")
|
139
|
+
if a:
|
140
|
+
# Get the module name from the <span class="pre"> tag
|
141
|
+
span = a.find("span", class_="pre")
|
142
|
+
if span:
|
143
|
+
name = span.text.strip()
|
144
|
+
href = a.get("href")
|
145
|
+
# Prepend base_url if href is relative
|
146
|
+
if href and not href.startswith("http"):
|
147
|
+
href = base_url.rstrip("/") + "/" + href.lstrip("/")
|
148
|
+
result[name] = href
|
149
|
+
return result
|
150
|
+
|
151
|
+
|
152
|
+
def _format_function(sig_dt, doc_dd) -> Dict[str, Any]:
|
153
|
+
"""
|
154
|
+
Format a function or method signature and its documentation into a dictionary.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
sig_dt: The <dt> tag containing the function/method signature.
|
158
|
+
doc_dd: The <dd> tag containing the function/method docstring.
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
dict: A dictionary with keys 'name', 'signature', 'id', and 'doc'.
|
162
|
+
"""
|
163
|
+
name = (
|
164
|
+
sig_dt.find("span", class_="sig-name").get_text(strip=True) if sig_dt else None
|
165
|
+
)
|
166
|
+
signature = sig_dt.get_text(strip=True) if sig_dt else None
|
167
|
+
return {
|
168
|
+
"name": mcp_utils._clean_signature_text(name),
|
169
|
+
"signature": mcp_utils._clean_signature_text(signature),
|
170
|
+
"id": sig_dt.get("id") if sig_dt else None,
|
171
|
+
"doc": doc_dd.get_text(" ", strip=True) if doc_dd else None,
|
172
|
+
}
|
173
|
+
|
174
|
+
|
175
|
+
def _format_attribute(attr_dl) -> Dict[str, Any]:
|
176
|
+
"""
|
177
|
+
Format a class attribute's signature and documentation into a dictionary.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
attr_dl: The <dl> tag for the attribute, containing <dt> and <dd>.
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
dict: A dictionary with keys 'name', 'signature', 'id', and 'doc'.
|
184
|
+
"""
|
185
|
+
sig = attr_dl.find("dt")
|
186
|
+
doc = attr_dl.find("dd")
|
187
|
+
name = sig.find("span", class_="sig-name").get_text(strip=True) if sig else None
|
188
|
+
signature = sig.get_text(strip=True) if sig else None
|
189
|
+
return {
|
190
|
+
"name": mcp_utils._clean_signature_text(name),
|
191
|
+
"signature": mcp_utils._clean_signature_text(signature),
|
192
|
+
"id": sig.get("id") if sig else None,
|
193
|
+
"doc": doc.get_text(" ", strip=True) if doc else None,
|
194
|
+
}
|
195
|
+
|
196
|
+
|
197
|
+
def _format_class(class_dl) -> Dict[str, Any]:
|
198
|
+
"""
|
199
|
+
Format a class definition, including its methods and attributes, into a dictionary.
|
200
|
+
|
201
|
+
Args:
|
202
|
+
class_dl: The <dl> tag for the class, containing <dt> and <dd>.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
dict: A dictionary with keys 'name', 'signature', 'id', 'doc', 'methods', and 'attributes'.
|
206
|
+
'methods' and 'attributes' are themselves dicts keyed by name.
|
207
|
+
"""
|
208
|
+
sig = class_dl.find("dt")
|
209
|
+
doc = class_dl.find("dd")
|
210
|
+
class_name = (
|
211
|
+
sig.find("span", class_="sig-name").get_text(strip=True) if sig else None
|
212
|
+
)
|
213
|
+
methods = {}
|
214
|
+
attributes = {}
|
215
|
+
if doc:
|
216
|
+
for meth_dl in doc.find_all("dl", class_="py method"):
|
217
|
+
meth = _format_function(meth_dl.find("dt"), meth_dl.find("dd"))
|
218
|
+
if meth["name"]:
|
219
|
+
methods[meth["name"]] = meth
|
220
|
+
for attr_dl in doc.find_all("dl", class_="py attribute"):
|
221
|
+
attr = _format_attribute(attr_dl)
|
222
|
+
if attr["name"]:
|
223
|
+
attributes[attr["name"]] = attr
|
224
|
+
return {
|
225
|
+
"name": mcp_utils._clean_signature_text(class_name),
|
226
|
+
"signature": mcp_utils._clean_signature_text(
|
227
|
+
sig.get_text(strip=True) if sig else None
|
228
|
+
),
|
229
|
+
"id": sig.get("id") if sig else None,
|
230
|
+
"doc": doc.get_text(" ", strip=True) if doc else None,
|
231
|
+
"methods": methods,
|
232
|
+
"attributes": attributes,
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
def _format_submodules(soup) -> dict:
|
237
|
+
"""
|
238
|
+
Extract submodules from a ReadTheDocs module page soup object.
|
239
|
+
Looks for a 'Modules' rubric and parses the following table or list for submodule names, URLs, and descriptions.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
soup (BeautifulSoup): Parsed HTML soup of the module page.
|
243
|
+
|
244
|
+
Returns:
|
245
|
+
dict: {submodule_name: {"url": str, "description": str}}
|
246
|
+
"""
|
247
|
+
submodules = {}
|
248
|
+
for rubric in soup.find_all("p", class_="rubric"):
|
249
|
+
if rubric.get_text(strip=True).lower() == "modules":
|
250
|
+
sib = rubric.find_next_sibling()
|
251
|
+
if sib and sib.name in ("table", "ul"):
|
252
|
+
for a in sib.find_all("a", href=True):
|
253
|
+
submod_name = a.get_text(strip=True)
|
254
|
+
submod_url = a["href"]
|
255
|
+
desc = ""
|
256
|
+
td = a.find_parent("td")
|
257
|
+
if td and td.find_next_sibling("td"):
|
258
|
+
desc = td.find_next_sibling("td").get_text(strip=True)
|
259
|
+
elif a.parent.name == "li":
|
260
|
+
next_p = a.find_next_sibling("p")
|
261
|
+
if next_p:
|
262
|
+
desc = next_p.get_text(strip=True)
|
263
|
+
submodules[submod_name] = {"url": submod_url, "description": desc}
|
264
|
+
return submodules
|
265
|
+
|
266
|
+
|
267
|
+
async def _parse_rtd_module_recursive(
|
268
|
+
module_url: str,
|
269
|
+
visited: Optional[Set[str]] = None,
|
270
|
+
docs_dict: Optional[Dict[str, Any]] = None,
|
271
|
+
) -> Dict[str, Any]:
|
272
|
+
"""
|
273
|
+
Recursively parse a module page and all its submodules.
|
274
|
+
"""
|
275
|
+
|
276
|
+
if visited is None:
|
277
|
+
visited = set()
|
278
|
+
if docs_dict is None:
|
279
|
+
docs_dict = {}
|
280
|
+
|
281
|
+
if module_url in visited:
|
282
|
+
return docs_dict
|
283
|
+
visited.add(module_url)
|
284
|
+
|
285
|
+
page_html = await mcp_utils.load_html_page(module_url)
|
286
|
+
module_doc = _parse_rtd_module_page(page_html, module_url)
|
287
|
+
module_name = module_doc.get("module") or module_url
|
288
|
+
docs_dict[module_name] = module_doc
|
289
|
+
|
290
|
+
# Recursively parse submodules
|
291
|
+
for submod_name, submod_info in module_doc.get("submodules", {}).items():
|
292
|
+
submod_url = submod_info["url"]
|
293
|
+
if not submod_url.startswith("http"):
|
294
|
+
base = module_url.rsplit("/", 1)[0]
|
295
|
+
submod_url = base + "/" + submod_url.lstrip("/")
|
296
|
+
await _parse_rtd_module_recursive(submod_url, visited, docs_dict)
|
297
|
+
|
298
|
+
return docs_dict
|