iflow-mcp_jin38324_oci-documentation-mcp-server 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/METADATA +108 -0
- iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/RECORD +9 -0
- iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/WHEEL +4 -0
- iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/entry_points.txt +2 -0
- iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/licenses/LICENSE +21 -0
- oci_documentation_mcp_server/__init__.py +11 -0
- oci_documentation_mcp_server/models.py +19 -0
- oci_documentation_mcp_server/server.py +299 -0
- oci_documentation_mcp_server/util.py +189 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iflow-mcp_jin38324_oci-documentation-mcp-server
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: An Model Context Protocol (MCP) server for OCI Documentation
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
8
|
+
Requires-Dist: googlesearch-python>=1.3.0
|
|
9
|
+
Requires-Dist: httpx>=0.27.0
|
|
10
|
+
Requires-Dist: loguru>=0.7.0
|
|
11
|
+
Requires-Dist: markdownify>=1.1.0
|
|
12
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
|
13
|
+
Requires-Dist: pydantic>=2.10.6
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
*Inspired by: https://github.com/awslabs/mcp/tree/main/src/aws-documentation-mcp-server*
|
|
17
|
+
|
|
18
|
+
# OCI Documentation MCP Server
|
|
19
|
+
|
|
20
|
+
Model Context Protocol (MCP) server for OCI Documentation
|
|
21
|
+
|
|
22
|
+
This MCP server provides tools to search for content, and access OCI documentation.
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Read Documentation**: Fetch and convert OCI documentation pages to markdown format
|
|
27
|
+
- **Search Documentation**: Search OCI documentation using search engine
|
|
28
|
+
|
|
29
|
+
## Prerequisites
|
|
30
|
+
|
|
31
|
+
### Installation Requirements
|
|
32
|
+
|
|
33
|
+
1. Install `uv` from [Astral](https://docs.astral.sh/uv/getting-started/installation/) or the [GitHub README](https://github.com/astral-sh/uv#installation)
|
|
34
|
+
2. Install Python 3.10 or newer using `uv python install 3.10` (or a more recent version)
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
MCP config:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"mcpServers": {
|
|
43
|
+
"oci-documentation-mcp-server": {
|
|
44
|
+
"command": "uvx",
|
|
45
|
+
"args": [
|
|
46
|
+
"--from",
|
|
47
|
+
"oci-documentation-mcp-server@latest",
|
|
48
|
+
"python",
|
|
49
|
+
"-m",
|
|
50
|
+
"oci_documentation_mcp_server.server"
|
|
51
|
+
],
|
|
52
|
+
"env": {
|
|
53
|
+
"FASTMCP_LOG_LEVEL": "ERROR"
|
|
54
|
+
},
|
|
55
|
+
"disabled": false,
|
|
56
|
+
"autoApprove": []
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
If above doesn't work, try below one:
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"mcpServers": {
|
|
69
|
+
"oci-documentation-mcp-server": {
|
|
70
|
+
"command": "uvx",
|
|
71
|
+
"args": ["oci-documentation-mcp-server@latest"],
|
|
72
|
+
"env": {
|
|
73
|
+
"FASTMCP_LOG_LEVEL": "ERROR"
|
|
74
|
+
},
|
|
75
|
+
"disabled": false,
|
|
76
|
+
"autoApprove": []
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Basic Usage
|
|
83
|
+
Example:
|
|
84
|
+
- In Cursor ask: `Write a function to download files for OCI Object Storage.`
|
|
85
|
+
|
|
86
|
+

|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
## Tools
|
|
93
|
+
|
|
94
|
+
### read_documentation
|
|
95
|
+
|
|
96
|
+
Fetches an OCI documentation page and converts it to markdown format.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
read_documentation(url: str) -> str
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### search_documentation
|
|
103
|
+
|
|
104
|
+
Searches OCI documentation using the search engine.
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
search_documentation(search_phrase: str, limit: int) -> list[dict]
|
|
108
|
+
```
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
oci_documentation_mcp_server/__init__.py,sha256=DhuRdKxYqMKFeZ2QbkMoyI0C9PkEPYJAkwbdr8K2wzs,545
|
|
2
|
+
oci_documentation_mcp_server/models.py,sha256=JYtDGw_siq2BwQfwb4eUYHE6pEkCOwdA_5wB28ipmNk,753
|
|
3
|
+
oci_documentation_mcp_server/server.py,sha256=1VbcwSC-ILM_ZdMM6q6lrbmHjIyntaQoYjz-39M4GQY,10621
|
|
4
|
+
oci_documentation_mcp_server/util.py,sha256=MXmMaQgDFEVvSGkOdGD27EKUAAiz_h1c7LDd_vRf9Kc,6212
|
|
5
|
+
iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/METADATA,sha256=MuNPJALUFziBI8P5ZxjSboZbkgkfH5R3E6GDJDL_JfY,2456
|
|
6
|
+
iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/entry_points.txt,sha256=zyPjtD80cZageHsk2JesRy2Nw1wlKPT8wXq9lwpJ_G4,90
|
|
8
|
+
iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/licenses/LICENSE,sha256=Ieeil7qXgjn_aRnd_Khc7914tFSgXsolwx-Z9hLlBP0,1060
|
|
9
|
+
iflow_mcp_jin38324_oci_documentation_mcp_server-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 jin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
|
|
2
|
+
# with the License. A copy of the License is located at
|
|
3
|
+
#
|
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
5
|
+
#
|
|
6
|
+
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
|
|
7
|
+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
|
|
8
|
+
# and limitations under the License.
|
|
9
|
+
"""oci-documentation-mcp-server"""
|
|
10
|
+
|
|
11
|
+
__version__ = '0.0.1'
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
|
|
2
|
+
# with the License. A copy of the License is located at
|
|
3
|
+
#
|
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
5
|
+
#
|
|
6
|
+
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
|
|
7
|
+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
|
|
8
|
+
# and limitations under the License.
|
|
9
|
+
"""Data models for OCI Documentation MCP Server."""
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SearchResult(BaseModel):
|
|
16
|
+
"""Search result from OCI documentation search."""
|
|
17
|
+
title: str
|
|
18
|
+
url: str
|
|
19
|
+
description: Optional[str] = None
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# This is an implementation of https://github.com/awslabs/mcp/tree/main/src/aws-documentation-mcp-server
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
|
|
4
|
+
# with the License. A copy of the License is located at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
|
|
9
|
+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
|
|
10
|
+
# and limitations under the License.
|
|
11
|
+
"""OCI Documentation MCP Server implementation."""
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import httpx
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import sys
|
|
18
|
+
from googlesearch import search
|
|
19
|
+
|
|
20
|
+
# Import models
|
|
21
|
+
from oci_documentation_mcp_server.models import (
|
|
22
|
+
SearchResult,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Import utility functions
|
|
26
|
+
from oci_documentation_mcp_server.util import (
|
|
27
|
+
extract_content_from_html,
|
|
28
|
+
format_documentation_result,
|
|
29
|
+
is_html_content
|
|
30
|
+
)
|
|
31
|
+
from loguru import logger
|
|
32
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
33
|
+
from pydantic import AnyUrl, Field
|
|
34
|
+
from typing import List, Union
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Set up logging
|
|
38
|
+
logger.remove()
|
|
39
|
+
logger.add(sys.stderr, level=os.getenv('FASTMCP_LOG_LEVEL', 'WARNING'))
|
|
40
|
+
|
|
41
|
+
DEFAULT_HEADERS = {
|
|
42
|
+
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
|
43
|
+
"sec-ch-ua-mobile":'?0',
|
|
44
|
+
"sec-ch-ua":'"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
45
|
+
"accept-language":'*/en-US,en;q=0.9',
|
|
46
|
+
'Content-Type': 'application/json',
|
|
47
|
+
}
|
|
48
|
+
# SEARCH_API_URL = 'https://docs.oracle.com/apps/ohcsearchclient/api/v2/search/pages'
|
|
49
|
+
# SEARCH_PARAMS = {
|
|
50
|
+
# "q": None,
|
|
51
|
+
# "size": None,
|
|
52
|
+
# "pg": 1,
|
|
53
|
+
# "product": "en/cloud/oracle-cloud-infrastructure",
|
|
54
|
+
# "showfirstpage": "true",
|
|
55
|
+
# "lang": "en",
|
|
56
|
+
# "snippet": "true"
|
|
57
|
+
# }
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
mcp = FastMCP(
|
|
61
|
+
'oci-documentation-mcp-server',
|
|
62
|
+
instructions="""
|
|
63
|
+
# OCI Documentation MCP Server
|
|
64
|
+
|
|
65
|
+
This server provides tools to access public OCI documentation, search for content, and get recommendations.
|
|
66
|
+
|
|
67
|
+
## Best Practices
|
|
68
|
+
|
|
69
|
+
- For long documentation pages, make multiple calls to `read_documentation` with different `start_index` values for pagination
|
|
70
|
+
- For very long documents (>30,000 characters), stop reading if you've found the needed information
|
|
71
|
+
- When searching, use specific technical terms rather than general phrases
|
|
72
|
+
- Use `recommend` tool to discover related content that might not appear in search results
|
|
73
|
+
- For recent updates to a service, get an URL for any page in that service, then check the **New** section of the `recommend` tool output on that URL
|
|
74
|
+
- If multiple searches with similar terms yield insufficient results, pivot to using `recommend` to find related pages.
|
|
75
|
+
- Always cite the documentation URL when providing information to users
|
|
76
|
+
|
|
77
|
+
## Tool Selection Guide
|
|
78
|
+
|
|
79
|
+
- Use `search_documentation` when: You need to find documentation about a specific OCI service or feature
|
|
80
|
+
- Use `read_documentation` when: You have a specific documentation URL and need its content
|
|
81
|
+
- Use `recommend` when: You want to find related content to a documentation page you're already viewing or need to find newly released information
|
|
82
|
+
- Use `recommend` as a fallback when: Multiple searches have not yielded the specific information needed
|
|
83
|
+
""",
|
|
84
|
+
dependencies=[
|
|
85
|
+
'pydantic',
|
|
86
|
+
'httpx',
|
|
87
|
+
'beautifulsoup4',
|
|
88
|
+
'googlesearch-python'
|
|
89
|
+
],
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@mcp.tool()
|
|
95
|
+
async def search_documentation(
|
|
96
|
+
ctx: Context,
|
|
97
|
+
search_phrase: str = Field(description='Search phrase to use'),
|
|
98
|
+
limit: int = Field(
|
|
99
|
+
default=3,
|
|
100
|
+
description='Maximum number of results to return',
|
|
101
|
+
ge=1,
|
|
102
|
+
le=10,
|
|
103
|
+
),
|
|
104
|
+
) -> List[SearchResult]:
|
|
105
|
+
"""Search OCI documentation using the OCI Documentation Search API.
|
|
106
|
+
|
|
107
|
+
## Usage
|
|
108
|
+
|
|
109
|
+
This tool searches across all OCI documentation for pages matching your search phrase.
|
|
110
|
+
Use it to find relevant documentation when you don't have a specific URL.
|
|
111
|
+
|
|
112
|
+
## Search Tips
|
|
113
|
+
|
|
114
|
+
- Use specific technical terms rather than general phrases
|
|
115
|
+
- Include service names to narrow results (e.g., "OCI Object Storage bucket versioning" instead of just "versioning")
|
|
116
|
+
- Use quotes for exact phrase matching (e.g., "Using Instance Configurations and Instance Pools")
|
|
117
|
+
- Include abbreviations and alternative terms to improve results
|
|
118
|
+
|
|
119
|
+
## Result Interpretation
|
|
120
|
+
|
|
121
|
+
Each result includes:
|
|
122
|
+
- score: The relevance score (higher is more relevant)
|
|
123
|
+
- url: The documentation page URL
|
|
124
|
+
- description: A brief excerpt or summary
|
|
125
|
+
- body: Related text snippets
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
ctx: MCP context for logging and error handling
|
|
129
|
+
search_phrase: Search phrase to use
|
|
130
|
+
limit: Maximum number of results to return
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
List of search results with URLs, titles, and context snippets
|
|
134
|
+
"""
|
|
135
|
+
logger.error(f'Searching OCI documentation for: {search_phrase}')
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
response = search(
|
|
139
|
+
f"{search_phrase} site:docs.oracle.com",
|
|
140
|
+
advanced=True,
|
|
141
|
+
num_results=limit
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
error_msg = f'Error searching OCI docs: {str(e)}'
|
|
146
|
+
logger.error(error_msg)
|
|
147
|
+
await ctx.error(error_msg)
|
|
148
|
+
return [SearchResult(title='', url='', description=error_msg)]
|
|
149
|
+
|
|
150
|
+
results = []
|
|
151
|
+
if response:
|
|
152
|
+
for i in response:
|
|
153
|
+
results.append(
|
|
154
|
+
SearchResult(
|
|
155
|
+
title=i.title,
|
|
156
|
+
url=i.url,
|
|
157
|
+
description=i.description
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
logger.debug(f'Found {len(results)} search results for: {search_phrase}')
|
|
162
|
+
return results
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@mcp.tool()
|
|
166
|
+
async def read_documentation(
|
|
167
|
+
ctx: Context,
|
|
168
|
+
url: str = Field(description='URL of the OCI documentation page to read'),
|
|
169
|
+
#url: Union[AnyUrl, str] = Field(description='URL of the OCI documentation page to read'),
|
|
170
|
+
max_length: int = Field(
|
|
171
|
+
default=5000,
|
|
172
|
+
description='Maximum number of characters to return.',
|
|
173
|
+
gt=0,
|
|
174
|
+
lt=1000000,
|
|
175
|
+
),
|
|
176
|
+
start_index: int = Field(
|
|
177
|
+
default=0,
|
|
178
|
+
description='On return output starting at this character index, useful if a previous fetch was truncated and more content is required.',
|
|
179
|
+
ge=0,
|
|
180
|
+
),
|
|
181
|
+
) -> str:
|
|
182
|
+
"""Fetch and convert an OCI documentation page to markdown format.
|
|
183
|
+
|
|
184
|
+
## Usage
|
|
185
|
+
|
|
186
|
+
This tool retrieves the content of an OCI documentation page and converts it to markdown format.
|
|
187
|
+
For long documents, you can make multiple calls with different start_index values to retrieve
|
|
188
|
+
the entire content in chunks.
|
|
189
|
+
|
|
190
|
+
## URL Requirements
|
|
191
|
+
|
|
192
|
+
- Must be from the https://docs.oracle.com/ domain
|
|
193
|
+
- Must end with .html or .htm
|
|
194
|
+
|
|
195
|
+
## Example URLs
|
|
196
|
+
|
|
197
|
+
- https://docs.oracle.com/en-us/iaas/Content/Object/Concepts/objectstorageoverview.htm
|
|
198
|
+
- https://docs.oracle.com/en-us/iaas/Content/Compute/References/bestpracticescompute.htm
|
|
199
|
+
|
|
200
|
+
## Output Format
|
|
201
|
+
|
|
202
|
+
The output is formatted as markdown text with:
|
|
203
|
+
- Preserved headings and structure
|
|
204
|
+
- Code blocks for examples
|
|
205
|
+
- Lists and tables converted to markdown format
|
|
206
|
+
|
|
207
|
+
## Handling Long Documents
|
|
208
|
+
|
|
209
|
+
If the response indicates the document was truncated, you have several options:
|
|
210
|
+
|
|
211
|
+
1. **Continue Reading**: Make another call with start_index set to the end of the previous response
|
|
212
|
+
2. **Stop Early**: For very long documents (>30,000 characters), if you've already found the specific information needed, you can stop reading
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
ctx: MCP context for logging and error handling
|
|
216
|
+
url: URL of the OCI documentation page to read
|
|
217
|
+
max_length: Maximum number of characters to return
|
|
218
|
+
start_index: On return output starting at this character index
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Markdown content of the OCI documentation
|
|
222
|
+
"""
|
|
223
|
+
# Validate that URL is from docs.oracle.com and ends with .htm
|
|
224
|
+
url_str = str(url)
|
|
225
|
+
if not re.match(r'^https?://docs\.oracle\.com/', url_str):
|
|
226
|
+
await ctx.error(f'Invalid URL: {url_str}. URL must be from the docs.oracle.com domain')
|
|
227
|
+
raise ValueError('URL must be from the docs.oracle.com domain')
|
|
228
|
+
if not url_str.endswith('.htm') and not url_str.endswith('.html'):
|
|
229
|
+
await ctx.error(f'Invalid URL: {url_str}. URL must end with .htm or .html')
|
|
230
|
+
raise ValueError('URL must end with .htm or .html')
|
|
231
|
+
|
|
232
|
+
logger.debug(f'Fetching documentation from {url_str}')
|
|
233
|
+
|
|
234
|
+
async with httpx.AsyncClient() as client:
|
|
235
|
+
try:
|
|
236
|
+
response = await client.get(
|
|
237
|
+
url_str,
|
|
238
|
+
follow_redirects=True,
|
|
239
|
+
headers=DEFAULT_HEADERS,
|
|
240
|
+
timeout=30,
|
|
241
|
+
)
|
|
242
|
+
except httpx.HTTPError as e:
|
|
243
|
+
error_msg = f'Failed to fetch {url_str}: {str(e)}'
|
|
244
|
+
logger.error(error_msg)
|
|
245
|
+
await ctx.error(error_msg)
|
|
246
|
+
return error_msg
|
|
247
|
+
|
|
248
|
+
if response.status_code >= 400:
|
|
249
|
+
error_msg = f'Failed to fetch {url_str} - status code {response.status_code}'
|
|
250
|
+
logger.error(error_msg)
|
|
251
|
+
await ctx.error(error_msg)
|
|
252
|
+
return error_msg
|
|
253
|
+
response.encoding = 'utf-8'
|
|
254
|
+
page_raw = response.text
|
|
255
|
+
content_type = response.headers.get('content-type', '')
|
|
256
|
+
|
|
257
|
+
if is_html_content(page_raw, content_type):
|
|
258
|
+
content = extract_content_from_html(page_raw)
|
|
259
|
+
else:
|
|
260
|
+
content = page_raw
|
|
261
|
+
|
|
262
|
+
result = format_documentation_result(url_str, content, start_index, max_length)
|
|
263
|
+
|
|
264
|
+
# Log if content was truncated
|
|
265
|
+
if len(content) > start_index + max_length:
|
|
266
|
+
logger.debug(
|
|
267
|
+
f'Content truncated at {start_index + max_length} of {len(content)} characters'
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
return result
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def main():
|
|
276
|
+
"""Run the MCP server with CLI argument support."""
|
|
277
|
+
parser = argparse.ArgumentParser(
|
|
278
|
+
description='An OCI Labs Model Context Protocol (MCP) server for OCI Documentation'
|
|
279
|
+
)
|
|
280
|
+
parser.add_argument('--sse', action='store_true', help='Use SSE transport')
|
|
281
|
+
parser.add_argument('--port', type=int, default=8888, help='Port to run the server on')
|
|
282
|
+
|
|
283
|
+
args = parser.parse_args()
|
|
284
|
+
|
|
285
|
+
# Log startup information
|
|
286
|
+
logger.info('Starting OCI Documentation MCP Server')
|
|
287
|
+
|
|
288
|
+
# Run server with appropriate transport
|
|
289
|
+
if args.sse:
|
|
290
|
+
logger.info(f'Using SSE transport on port {args.port}')
|
|
291
|
+
mcp.settings.port = args.port
|
|
292
|
+
mcp.run(transport='sse')
|
|
293
|
+
else:
|
|
294
|
+
logger.info('Using standard stdio transport')
|
|
295
|
+
mcp.run()
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
if __name__ == '__main__':
|
|
299
|
+
main()
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
|
|
2
|
+
# with the License. A copy of the License is located at
|
|
3
|
+
#
|
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
5
|
+
#
|
|
6
|
+
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
|
|
7
|
+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
|
|
8
|
+
# and limitations under the License.
|
|
9
|
+
"""Utility functions for OCI Documentation MCP Server."""
|
|
10
|
+
|
|
11
|
+
import markdownify
|
|
12
|
+
from typing import Any, Dict, List
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_content_from_html(html_string: str) -> str:
|
|
16
|
+
"""Extract and convert HTML content to Markdown format.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
html: Raw HTML content to process
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Simplified markdown version of the content
|
|
23
|
+
"""
|
|
24
|
+
if not html_string:
|
|
25
|
+
return '<e>Empty HTML content</e>'
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# First use BeautifulSoup to clean up the HTML
|
|
29
|
+
from bs4 import BeautifulSoup
|
|
30
|
+
import html
|
|
31
|
+
|
|
32
|
+
html_content = html.unescape(html_string)
|
|
33
|
+
utf8_encoded_html = html_content.encode('utf-8')
|
|
34
|
+
# Parse HTML with BeautifulSoup
|
|
35
|
+
soup = BeautifulSoup(utf8_encoded_html, 'html.parser')
|
|
36
|
+
|
|
37
|
+
# Try to find the main content area
|
|
38
|
+
main_content = None
|
|
39
|
+
|
|
40
|
+
# Common content container selectors for OCI documentation
|
|
41
|
+
content_selectors = [
|
|
42
|
+
'main',
|
|
43
|
+
'article',
|
|
44
|
+
'#main-content',
|
|
45
|
+
'.main-content',
|
|
46
|
+
'#content',
|
|
47
|
+
'.content',
|
|
48
|
+
"div[role='main']",
|
|
49
|
+
'#awsdocs-content',
|
|
50
|
+
'.awsui-article',
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
# Try to find the main content using common selectors
|
|
54
|
+
for selector in content_selectors:
|
|
55
|
+
content = soup.select_one(selector)
|
|
56
|
+
if content:
|
|
57
|
+
main_content = content
|
|
58
|
+
break
|
|
59
|
+
|
|
60
|
+
# If no main content found, use the body
|
|
61
|
+
if not main_content:
|
|
62
|
+
main_content = soup.body if soup.body else soup
|
|
63
|
+
|
|
64
|
+
# Remove navigation elements that might be in the main content
|
|
65
|
+
nav_selectors = [
|
|
66
|
+
'noscript',
|
|
67
|
+
'.prev-next',
|
|
68
|
+
'#main-col-footer',
|
|
69
|
+
'.awsdocs-page-utilities',
|
|
70
|
+
'#quick-feedback-yes',
|
|
71
|
+
'#quick-feedback-no',
|
|
72
|
+
'.page-loading-indicator',
|
|
73
|
+
'#tools-panel',
|
|
74
|
+
'.doc-cookie-banner',
|
|
75
|
+
'awsdocs-copyright',
|
|
76
|
+
'awsdocs-thumb-feedback',
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
for selector in nav_selectors:
|
|
80
|
+
for element in main_content.select(selector):
|
|
81
|
+
element.decompose()
|
|
82
|
+
|
|
83
|
+
# Define tags to strip - these are elements we don't want in the output
|
|
84
|
+
tags_to_strip = [
|
|
85
|
+
'script',
|
|
86
|
+
'style',
|
|
87
|
+
'noscript',
|
|
88
|
+
'meta',
|
|
89
|
+
'link',
|
|
90
|
+
'footer',
|
|
91
|
+
'nav',
|
|
92
|
+
'aside',
|
|
93
|
+
'header',
|
|
94
|
+
# AWS documentation specific elements
|
|
95
|
+
'awsdocs-cookie-consent-container',
|
|
96
|
+
'awsdocs-feedback-container',
|
|
97
|
+
'awsdocs-page-header',
|
|
98
|
+
'awsdocs-page-header-container',
|
|
99
|
+
'awsdocs-filter-selector',
|
|
100
|
+
'awsdocs-breadcrumb-container',
|
|
101
|
+
'awsdocs-page-footer',
|
|
102
|
+
'awsdocs-page-footer-container',
|
|
103
|
+
'awsdocs-footer',
|
|
104
|
+
'awsdocs-cookie-banner',
|
|
105
|
+
# Common unnecessary elements
|
|
106
|
+
'js-show-more-buttons',
|
|
107
|
+
'js-show-more-text',
|
|
108
|
+
'feedback-container',
|
|
109
|
+
'feedback-section',
|
|
110
|
+
'doc-feedback-container',
|
|
111
|
+
'doc-feedback-section',
|
|
112
|
+
'warning-container',
|
|
113
|
+
'warning-section',
|
|
114
|
+
'cookie-banner',
|
|
115
|
+
'cookie-notice',
|
|
116
|
+
'copyright-section',
|
|
117
|
+
'legal-section',
|
|
118
|
+
'terms-section',
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Use markdownify on the cleaned HTML content
|
|
122
|
+
content = markdownify.markdownify(
|
|
123
|
+
str(main_content),
|
|
124
|
+
heading_style=markdownify.ATX,
|
|
125
|
+
autolinks=True,
|
|
126
|
+
default_title=True,
|
|
127
|
+
escape_asterisks=True,
|
|
128
|
+
escape_underscores=True,
|
|
129
|
+
newline_style='SPACES',
|
|
130
|
+
strip=tags_to_strip,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if not content:
|
|
134
|
+
return '<e>Page failed to be simplified from HTML</e>'
|
|
135
|
+
|
|
136
|
+
return content
|
|
137
|
+
except Exception as e:
|
|
138
|
+
return f'<e>Error converting HTML to Markdown: {str(e)}</e>'
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def is_html_content(page_raw: str, content_type: str) -> bool:
|
|
142
|
+
"""Determine if content is HTML.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
page_raw: Raw page content
|
|
146
|
+
content_type: Content-Type header
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
True if content is HTML, False otherwise
|
|
150
|
+
"""
|
|
151
|
+
return '<html' in page_raw[:100] or 'text/html' in content_type or not content_type
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def format_documentation_result(url: str, content: str, start_index: int, max_length: int) -> str:
|
|
155
|
+
"""Format documentation result with pagination information.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
url: Documentation URL
|
|
159
|
+
content: Content to format
|
|
160
|
+
start_index: Start index for pagination
|
|
161
|
+
max_length: Maximum content length
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Formatted documentation result
|
|
165
|
+
"""
|
|
166
|
+
original_length = len(content)
|
|
167
|
+
|
|
168
|
+
if start_index >= original_length:
|
|
169
|
+
return f'OCI Documentation from {url}:\n\n<e>No more content available.</e>'
|
|
170
|
+
|
|
171
|
+
# Calculate the end index, ensuring we don't go beyond the content length
|
|
172
|
+
end_index = min(start_index + max_length, original_length)
|
|
173
|
+
truncated_content = content[start_index:end_index]
|
|
174
|
+
|
|
175
|
+
if not truncated_content:
|
|
176
|
+
return f'OCI Documentation from {url}:\n\n<e>No more content available.</e>'
|
|
177
|
+
|
|
178
|
+
actual_content_length = len(truncated_content)
|
|
179
|
+
remaining_content = original_length - (start_index + actual_content_length)
|
|
180
|
+
|
|
181
|
+
result = f'OCI Documentation from {url}:\n\n{truncated_content}'
|
|
182
|
+
|
|
183
|
+
# Only add the prompt to continue fetching if there is still remaining content
|
|
184
|
+
if remaining_content > 0:
|
|
185
|
+
next_start = start_index + actual_content_length
|
|
186
|
+
result += f'\n\n<e>Content truncated. Call the read_documentation tool with start_index={next_start} to get more content.</e>'
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
|