pse-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GEMINI.md +72 -0
- package/License.md +3 -0
- package/MCP Documents/README.md +1 -0
- package/MCP Documents/mcp-client-guide.txt +736 -0
- package/MCP Documents/mcp-complete-guide.txt +522 -0
- package/MCP Documents/mcp-enhanced-instructions.md +297 -0
- package/MCP Documents/mcp-server-guide.md +415 -0
- package/MCP Documents/mcp-windows.txt +161 -0
- package/QWEN.md +207 -0
- package/README.md +220 -0
- package/dist/content-fetcher.js +36 -0
- package/dist/google-search.js +421 -0
- package/dist/services/content-extractor.service.js +195 -0
- package/dist/services/google-search.service.js +244 -0
- package/dist/types.js +1 -0
- package/dist-package/README.md +210 -0
- package/dist-package/dist/content-fetcher.js +36 -0
- package/dist-package/dist/google-search.js +420 -0
- package/dist-package/dist/services/content-extractor.service.js +195 -0
- package/dist-package/dist/services/google-search.service.js +244 -0
- package/dist-package/dist/types.js +1 -0
- package/dist-package/package-lock.json +3104 -0
- package/dist-package/package.json +23 -0
- package/license +4 -0
- package/package.json +40 -0
- package/src/google-search.ts +477 -0
- package/src/mcp.d.ts +36 -0
- package/src/services/content-extractor.service.ts +232 -0
- package/src/services/google-search.service.ts +305 -0
- package/src/types.ts +64 -0
- package/tasks.md +141 -0
- package/tsconfig.json +16 -0
@@ -0,0 +1,161 @@
|
|
1
|
+
# Windows-Specific MCP Implementation Guide
|
2
|
+
|
3
|
+
## Common Issues with stdio on Windows
|
4
|
+
|
5
|
+
### 1. Line Ending Differences
|
6
|
+
- Windows uses CRLF (`\r\n`) for line endings
|
7
|
+
- Unix-based systems use LF (`\n`)
|
8
|
+
- This difference can cause serious problems in stdio communication with MCP servers
|
9
|
+
- JSON-RPC 2.0 (which MCP uses) expects consistent message framing
|
10
|
+
|
11
|
+
### 2. Text vs. Binary Mode
|
12
|
+
- Windows default text mode automatically translates line endings
|
13
|
+
- This automatic translation can corrupt protocol data
|
14
|
+
- Local MCP servers must not log messages to stdout
|
15
|
+
- All protocol communication must use binary mode
|
16
|
+
|
17
|
+
### 3. Console Buffering Issues
|
18
|
+
- Windows console applications have different buffering behavior
|
19
|
+
- Can cause timing issues or message truncation in stdio transport
|
20
|
+
- Requires proper buffer handling in both client and server
|
21
|
+
|
22
|
+
### 4. Environment Variable Inheritance
|
23
|
+
- MCP servers inherit only a subset of environment variables automatically
|
24
|
+
- Windows has different environment variable handling
|
25
|
+
- May require additional configuration for proper environment setup
|
26
|
+
|
27
|
+
## Best Practices for Windows Implementation
|
28
|
+
|
29
|
+
### 1. Use Binary Mode for stdio Streams
|
30
|
+
For Python implementations on Windows:
|
31
|
+
```python
|
32
|
+
import sys
|
33
|
+
import os
|
34
|
+
|
35
|
+
# Force binary mode for stdin/stdout
|
36
|
+
if sys.platform == 'win32':
|
37
|
+
import msvcrt
|
38
|
+
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
39
|
+
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
40
|
+
```
|
41
|
+
|
42
|
+
For TypeScript/Node.js implementations:
|
43
|
+
```typescript
|
44
|
+
if (process.platform === 'win32') {
|
45
|
+
process.stdin.setEncoding('binary');
|
46
|
+
process.stdout.setDefaultEncoding('binary');
|
47
|
+
}
|
48
|
+
```
|
49
|
+
|
50
|
+
### 2. Leverage Existing SDKs
|
51
|
+
- Use official SDKs when possible
|
52
|
+
- Python SDK includes proper Windows stdio handling
|
53
|
+
- TypeScript SDK handles cross-platform differences
|
54
|
+
- Avoid implementing custom stdio handling
|
55
|
+
|
56
|
+
### 3. Console Output Best Practices
|
57
|
+
- Use stderr for logging and debugging
|
58
|
+
- Never write to stdout in server implementations
|
59
|
+
- Log messages are automatically captured by host applications
|
60
|
+
- Keep protocol communication separate from logging
|
61
|
+
|
62
|
+
### 4. Path Handling
|
63
|
+
- Windows uses backslashes (`\`) as path separators
|
64
|
+
- Forward slashes (`/`) also work on Windows
|
65
|
+
- Escape backslashes in string literals
|
66
|
+
- Consider using path.join() for cross-platform compatibility
|
67
|
+
|
68
|
+
### 5. Testing Guidelines
|
69
|
+
- Test explicitly on Windows
|
70
|
+
- Don't assume Unix-working code will work on Windows
|
71
|
+
- Test with different Windows versions
|
72
|
+
- Verify stdio behavior in various scenarios
|
73
|
+
|
74
|
+
### 6. Error Handling
|
75
|
+
- Implement platform-specific error handling
|
76
|
+
- Handle Windows-specific exceptions
|
77
|
+
- Add retry logic for transient issues
|
78
|
+
- Provide detailed error messages
|
79
|
+
|
80
|
+
### 7. Transport Alternatives
|
81
|
+
- Consider HTTP+SSE for more consistent behavior
|
82
|
+
- Use WebSocket transport when available
|
83
|
+
- Implement fallback mechanisms
|
84
|
+
- Document transport limitations
|
85
|
+
|
86
|
+
## Implementation Examples
|
87
|
+
|
88
|
+
### Basic Python MCP Server
|
89
|
+
```python
|
90
|
+
from mcp import Server, StdioTransport
|
91
|
+
import sys
|
92
|
+
import os
|
93
|
+
|
94
|
+
# Windows-specific setup
|
95
|
+
if sys.platform == 'win32':
|
96
|
+
import msvcrt
|
97
|
+
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
98
|
+
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
99
|
+
|
100
|
+
# Create server with proper error handling
|
101
|
+
server = Server(
|
102
|
+
name="windows-example",
|
103
|
+
version="1.0.0",
|
104
|
+
transport=StdioTransport()
|
105
|
+
)
|
106
|
+
|
107
|
+
# Log to stderr, not stdout
|
108
|
+
def log_error(msg):
|
109
|
+
print(f"Error: {msg}", file=sys.stderr)
|
110
|
+
|
111
|
+
try:
|
112
|
+
server.start()
|
113
|
+
except Exception as e:
|
114
|
+
log_error(f"Server error: {e}")
|
115
|
+
sys.exit(1)
|
116
|
+
```
|
117
|
+
|
118
|
+
### Basic TypeScript MCP Server
|
119
|
+
```typescript
|
120
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server";
|
121
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/transport/stdio";
|
122
|
+
|
123
|
+
// Windows-specific setup
|
124
|
+
if (process.platform === 'win32') {
|
125
|
+
process.stdin.setEncoding('binary');
|
126
|
+
process.stdout.setDefaultEncoding('binary');
|
127
|
+
}
|
128
|
+
|
129
|
+
const server = new McpServer({
|
130
|
+
name: "windows-example",
|
131
|
+
version: "1.0.0"
|
132
|
+
});
|
133
|
+
|
134
|
+
// Error handling for Windows
|
135
|
+
process.on('uncaughtException', (error) => {
|
136
|
+
console.error(`Uncaught Exception: ${error.message}`);
|
137
|
+
process.exit(1);
|
138
|
+
});
|
139
|
+
|
140
|
+
const transport = new StdioServerTransport();
|
141
|
+
server.listen(transport).catch((error) => {
|
142
|
+
console.error(`Server Error: ${error.message}`);
|
143
|
+
process.exit(1);
|
144
|
+
});
|
145
|
+
```
|
146
|
+
|
147
|
+
## Best Practices Summary
|
148
|
+
|
149
|
+
1. **Always Use Binary Mode**: Prevent line ending translation issues
|
150
|
+
2. **Proper Error Handling**: Account for Windows-specific exceptions
|
151
|
+
3. **Path Management**: Handle Windows path separators correctly
|
152
|
+
4. **Testing**: Verify functionality specifically on Windows
|
153
|
+
5. **SDK Usage**: Leverage official SDKs for platform compatibility
|
154
|
+
6. **Logging**: Use stderr for debugging, keep stdout clean
|
155
|
+
7. **Transport Options**: Consider alternatives to stdio when needed
|
156
|
+
|
157
|
+
## Additional Resources
|
158
|
+
|
159
|
+
- [Official MCP Windows Documentation](https://modelcontextprotocol.io/docs/windows)
|
160
|
+
- [Windows-Specific Examples](https://github.com/modelcontextprotocol/examples/windows)
|
161
|
+
- [Troubleshooting Guide](https://modelcontextprotocol.io/docs/troubleshooting#windows)
|
package/QWEN.md
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
# Google Search MCP Server - Project Context
|
2
|
+
|
3
|
+
## Project Overview
|
4
|
+
|
5
|
+
The Google Search MCP Server is a Model Context Protocol (MCP) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically through a standardized MCP interface.
|
6
|
+
|
7
|
+
### Key Features
|
8
|
+
- Google Custom Search integration
|
9
|
+
- Advanced search features (filters, sorting, pagination, categorization)
|
10
|
+
- Webpage content analysis in multiple formats (markdown, HTML, plain text)
|
11
|
+
- Batch webpage analysis
|
12
|
+
- Result categorization and classification
|
13
|
+
- Content summarization
|
14
|
+
- Optimized, human-readable responses
|
15
|
+
- MCP-compliant interface
|
16
|
+
|
17
|
+
### Technology Stack
|
18
|
+
- **Language**: TypeScript
|
19
|
+
- **Framework**: Node.js
|
20
|
+
- **Core Dependencies**:
|
21
|
+
- `@modelcontextprotocol/sdk`: MCP server SDK
|
22
|
+
- `googleapis`: Google API integration
|
23
|
+
- `@mozilla/readability`: Content extraction
|
24
|
+
- `cheerio`: HTML parsing
|
25
|
+
- `jsdom`: DOM manipulation
|
26
|
+
- `turndown`: HTML to Markdown conversion
|
27
|
+
- `axios`: HTTP client
|
28
|
+
- `express`: Web framework (if needed)
|
29
|
+
|
30
|
+
## Project Structure
|
31
|
+
|
32
|
+
```
|
33
|
+
D:\ai\pse-mcp\
|
34
|
+
├── dist/ # Compiled JavaScript files
|
35
|
+
├── dist-package/ # Distribution package
|
36
|
+
├── MCP Documents/ # MCP protocol documentation
|
37
|
+
├── src/ # Source TypeScript files
|
38
|
+
│ ├── services/ # Service implementations
|
39
|
+
│ │ ├── google-search.service.ts
|
40
|
+
│ │ └── content-extractor.service.ts
|
41
|
+
│ ├── google-search.ts # Main server entry point
|
42
|
+
│ ├── mcp.d.ts # MCP type definitions
|
43
|
+
│ └── types.ts # Type definitions
|
44
|
+
├── package.json # Project dependencies and scripts
|
45
|
+
├── tsconfig.json # TypeScript configuration
|
46
|
+
├── README.md # Project documentation
|
47
|
+
└── ...
|
48
|
+
```
|
49
|
+
|
50
|
+
## Building and Running
|
51
|
+
|
52
|
+
### Prerequisites
|
53
|
+
- Node.js (v16 or higher)
|
54
|
+
- Google Cloud Platform account
|
55
|
+
- Custom Search Engine ID
|
56
|
+
- Google API Key
|
57
|
+
|
58
|
+
### Setup Commands
|
59
|
+
1. Install dependencies:
|
60
|
+
```bash
|
61
|
+
npm install
|
62
|
+
```
|
63
|
+
|
64
|
+
2. Build the TypeScript code:
|
65
|
+
```bash
|
66
|
+
npm run build
|
67
|
+
```
|
68
|
+
|
69
|
+
3. Run the server:
|
70
|
+
```bash
|
71
|
+
npm run start
|
72
|
+
```
|
73
|
+
|
74
|
+
### Environment Variables
|
75
|
+
Required environment variables:
|
76
|
+
- `GOOGLE_API_KEY`: Your Google API key
|
77
|
+
- `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
|
78
|
+
|
79
|
+
### Development Scripts
|
80
|
+
- `npm run build`: Compiles TypeScript to JavaScript
|
81
|
+
- `npm run start`: Runs the compiled server
|
82
|
+
- `npm run dev`: Watches for changes and recompiles (tsc -w)
|
83
|
+
|
84
|
+
## Architecture and Components
|
85
|
+
|
86
|
+
### Main Components
|
87
|
+
1. **GoogleSearchService**: Handles Google API interactions for search functionality
|
88
|
+
2. **ContentExtractor**: Manages webpage content analysis and extraction
|
89
|
+
3. **Main Server (google-search.ts)**: MCP server implementation with tool handlers
|
90
|
+
|
91
|
+
### Services
|
92
|
+
|
93
|
+
#### GoogleSearchService
|
94
|
+
- Integrates with Google Custom Search API
|
95
|
+
- Provides caching mechanism (5-minute TTL, max 100 entries)
|
96
|
+
- Implements advanced search filtering and pagination
|
97
|
+
- Categorizes search results by content type
|
98
|
+
- Handles error management and response formatting
|
99
|
+
|
100
|
+
#### ContentExtractor
|
101
|
+
- Extracts webpage content using Mozilla Readability
|
102
|
+
- Converts content to markdown, HTML, or plain text formats
|
103
|
+
- Implements content caching (30-minute TTL, max 50 entries)
|
104
|
+
- Generates content summaries and statistics
|
105
|
+
- Handles batch processing of multiple webpages
|
106
|
+
|
107
|
+
### Available Tools
|
108
|
+
|
109
|
+
#### 1. google_search
|
110
|
+
Searches Google and returns relevant results with advanced filtering options:
|
111
|
+
- Query string (required)
|
112
|
+
- Number of results (default: 5, max: 10)
|
113
|
+
- Site filtering
|
114
|
+
- Language filtering (ISO 639-1 codes)
|
115
|
+
- Date restrictions
|
116
|
+
- Exact phrase matching
|
117
|
+
- Result type (news, images, videos)
|
118
|
+
- Pagination support
|
119
|
+
- Sorting (relevance or date)
|
120
|
+
|
121
|
+
#### 2. extract_webpage_content
|
122
|
+
Extracts and analyzes content from a single webpage:
|
123
|
+
- URL (required)
|
124
|
+
- Output format (markdown, html, text)
|
125
|
+
- Removes ads, navigation, and clutter
|
126
|
+
- Returns title, description, content stats, and summary
|
127
|
+
|
128
|
+
#### 3. extract_multiple_webpages
|
129
|
+
Extracts content from multiple webpages in a single request:
|
130
|
+
- Array of URLs (max 5 per request)
|
131
|
+
- Output format (markdown, html, text)
|
132
|
+
- Batch processing capability
|
133
|
+
|
134
|
+
## Configuration
|
135
|
+
|
136
|
+
The server configuration needs to be added to the MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
|
137
|
+
|
138
|
+
```json
|
139
|
+
{
|
140
|
+
"mcpServers": {
|
141
|
+
"google-search": {
|
142
|
+
"autoApprove": [
|
143
|
+
"google_search",
|
144
|
+
"extract_webpage_content",
|
145
|
+
"extract_multiple_webpages"
|
146
|
+
],
|
147
|
+
"disabled": false,
|
148
|
+
"timeout": 60,
|
149
|
+
"command": "node",
|
150
|
+
"args": [
|
151
|
+
"/path/to/google-search-mcp-server/dist/google-search.js"
|
152
|
+
],
|
153
|
+
"env": {
|
154
|
+
"GOOGLE_API_KEY": "your-google-api-key",
|
155
|
+
"GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
|
156
|
+
},
|
157
|
+
"transportType": "stdio"
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
```
|
162
|
+
|
163
|
+
## Development Conventions
|
164
|
+
|
165
|
+
### Coding Standards
|
166
|
+
- TypeScript with strict mode enabled
|
167
|
+
- Use of async/await for asynchronous operations
|
168
|
+
- Proper error handling with descriptive messages
|
169
|
+
- Input validation for all tool arguments
|
170
|
+
- Caching strategies for performance optimization
|
171
|
+
|
172
|
+
### Type Safety
|
173
|
+
- Comprehensive type definitions in `types.ts`
|
174
|
+
- Strict typing for all function parameters and return values
|
175
|
+
- MCP request/response schema validation
|
176
|
+
|
177
|
+
### Caching Strategy
|
178
|
+
- Search results: 5-minute TTL with max 100 entries
|
179
|
+
- Webpage content: 30-minute TTL with max 50 entries
|
180
|
+
- Cache keys generated from request parameters
|
181
|
+
- Automatic cleanup of oldest entries when limits exceeded
|
182
|
+
|
183
|
+
## MCP Protocol Integration
|
184
|
+
|
185
|
+
The server implements the Model Context Protocol with:
|
186
|
+
- Standardized tool listing and calling
|
187
|
+
- Input schema validation
|
188
|
+
- Error response formatting
|
189
|
+
- Stdio transport for communication with MCP clients
|
190
|
+
|
191
|
+
## Testing and Verification
|
192
|
+
|
193
|
+
To verify the server is working:
|
194
|
+
1. Build with `npm run build`
|
195
|
+
2. Start with `npm run start`
|
196
|
+
3. Use MCP client to call the available tools
|
197
|
+
4. Verify search results and content extraction work as expected
|
198
|
+
|
199
|
+
## Deployment
|
200
|
+
|
201
|
+
For distribution, the project includes a process to create a compiled distribution package:
|
202
|
+
1. Build the TypeScript code
|
203
|
+
2. Create a distribution package with only necessary files
|
204
|
+
3. Include production dependencies only
|
205
|
+
4. Simplified package.json for end users
|
206
|
+
|
207
|
+
The distribution approach allows for shipping compiled JavaScript without exposing source code while maintaining functionality.
|
package/README.md
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
# Version 2.0 is here
|
2
|
+
|
3
|
+
# Google Search MCP Server
|
4
|
+
An MCP (Model Context Protocol) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically.
|
5
|
+
|
6
|
+
## Features
|
7
|
+
|
8
|
+
- Google Custom Search integration
|
9
|
+
- Advanced search features (filters, sorting, pagination, categorization)
|
10
|
+
- Webpage content analysis in multiple formats (markdown, HTML, plain text)
|
11
|
+
- Batch webpage analysis
|
12
|
+
- Result categorization and classification
|
13
|
+
- Content summarization
|
14
|
+
- Optimized, human-readable responses
|
15
|
+
- MCP-compliant interface
|
16
|
+
|
17
|
+
## Prerequisites
|
18
|
+
|
19
|
+
- Node.js (v16 or higher)
|
20
|
+
- Google Cloud Platform account
|
21
|
+
- Custom Search Engine ID
|
22
|
+
- Google API Key
|
23
|
+
|
24
|
+
## Installation
|
25
|
+
|
26
|
+
1. Clone the repository
|
27
|
+
2. Install Node.js dependencies:
|
28
|
+
```bash
|
29
|
+
npm install
|
30
|
+
```
|
31
|
+
3. Build the TypeScript code:
|
32
|
+
```bash
|
33
|
+
npm run build
|
34
|
+
```
|
35
|
+
|
36
|
+
## Configuration
|
37
|
+
|
38
|
+
1. Set up environment variables for your Google API credentials:
|
39
|
+
|
40
|
+
You can either set these as system environment variables or configure them in your MCP settings file.
|
41
|
+
|
42
|
+
Required environment variables:
|
43
|
+
- `GOOGLE_API_KEY`: Your Google API key
|
44
|
+
- `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
|
45
|
+
|
46
|
+
2. Add the server configuration to your MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
|
47
|
+
```json
|
48
|
+
{
|
49
|
+
"mcpServers": {
|
50
|
+
"google-search": {
|
51
|
+
"autoApprove": [
|
52
|
+
"google_search",
|
53
|
+
"extract_webpage_content",
|
54
|
+
"extract_multiple_webpages"
|
55
|
+
],
|
56
|
+
"disabled": false,
|
57
|
+
"timeout": 60,
|
58
|
+
"command": "node",
|
59
|
+
"args": [
|
60
|
+
"/path/to/google-search-mcp-server/dist/google-search.js"
|
61
|
+
],
|
62
|
+
"env": {
|
63
|
+
"GOOGLE_API_KEY": "your-google-api-key",
|
64
|
+
"GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
|
65
|
+
},
|
66
|
+
"transportType": "stdio"
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
```
|
71
|
+
|
72
|
+
## Running
|
73
|
+
|
74
|
+
Start the MCP server:
|
75
|
+
```bash
|
76
|
+
npm run start
|
77
|
+
```
|
78
|
+
|
79
|
+
## Available Tools
|
80
|
+
|
81
|
+
### 1. google_search
|
82
|
+
Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google's search engine.
|
83
|
+
|
84
|
+
```typescript
|
85
|
+
{
|
86
|
+
"name": "google_search",
|
87
|
+
"arguments": {
|
88
|
+
"query": "your search query",
|
89
|
+
"num_results": 5, // optional, default: 5
|
90
|
+
"site": "example.com", // optional, limit results to specific website
|
91
|
+
"language": "en", // optional, filter by language (ISO 639-1 code)
|
92
|
+
"dateRestrict": "m6", // optional, filter by date (e.g., "m6" for last 6 months)
|
93
|
+
"exactTerms": "exact phrase", // optional, search for exact phrase
|
94
|
+
"resultType": "news", // optional, specify type (news, images, videos)
|
95
|
+
"page": 2, // optional, page number for pagination (starts at 1)
|
96
|
+
"resultsPerPage": 10, // optional, results per page (max: 10)
|
97
|
+
"sort": "date" // optional, sort by "date" or "relevance" (default)
|
98
|
+
}
|
99
|
+
}
|
100
|
+
```
|
101
|
+
|
102
|
+
Response includes:
|
103
|
+
- Search results with title, link, snippet in a readable format
|
104
|
+
- Pagination information (current page, total results, etc.)
|
105
|
+
- Categories of results (automatically detected)
|
106
|
+
- Navigation hints for pagination
|
107
|
+
|
108
|
+
### 2. extract_webpage_content
|
109
|
+
Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter.
|
110
|
+
|
111
|
+
```typescript
|
112
|
+
{
|
113
|
+
"name": "extract_webpage_content",
|
114
|
+
"arguments": {
|
115
|
+
"url": "https://example.com",
|
116
|
+
"format": "markdown" // optional, format options: "markdown" (default), "html", or "text"
|
117
|
+
}
|
118
|
+
}
|
119
|
+
```
|
120
|
+
|
121
|
+
Response includes:
|
122
|
+
- Title and description of the webpage
|
123
|
+
- Content statistics (word count, character count)
|
124
|
+
- Content summary
|
125
|
+
- Content preview (first 500 characters)
|
126
|
+
|
127
|
+
### 3. extract_multiple_webpages
|
128
|
+
Extract and analyze content from multiple webpages in a single request. Ideal for comparing information across different sources or gathering comprehensive information on a topic.
|
129
|
+
|
130
|
+
```typescript
|
131
|
+
{
|
132
|
+
"name": "extract_multiple_webpages",
|
133
|
+
"arguments": {
|
134
|
+
"urls": [
|
135
|
+
"https://example1.com",
|
136
|
+
"https://example2.com"
|
137
|
+
],
|
138
|
+
"format": "html" // optional, format options: "markdown" (default), "html", or "text"
|
139
|
+
}
|
140
|
+
}
|
141
|
+
```
|
142
|
+
|
143
|
+
Response includes:
|
144
|
+
- Title and description of each webpage
|
145
|
+
- Content statistics for each webpage
|
146
|
+
- Content summary for each webpage
|
147
|
+
- Content preview for each webpage (first 150 characters)
|
148
|
+
|
149
|
+
## Getting Google API Credentials
|
150
|
+
|
151
|
+
1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
|
152
|
+
2. Create a new project or select an existing one
|
153
|
+
3. Enable the Custom Search API
|
154
|
+
4. Create API credentials (API Key)
|
155
|
+
5. Go to the [Custom Search Engine](https://programmablesearchengine.google.com/about/) page
|
156
|
+
6. Create a new search engine and get your Search Engine ID
|
157
|
+
7. Add these credentials to your MCP settings file or set them as environment variables
|
158
|
+
|
159
|
+
## Error Handling
|
160
|
+
|
161
|
+
The server provides detailed error messages for:
|
162
|
+
- Missing or invalid API credentials
|
163
|
+
- Failed search requests
|
164
|
+
- Invalid webpage URLs
|
165
|
+
- Network connectivity issues
|
166
|
+
|
167
|
+
## Architecture
|
168
|
+
|
169
|
+
The server is built with TypeScript and uses the MCP SDK to provide a standardized interface for AI models to interact with Google Search and webpage content analysis tools. It consists of two main services:
|
170
|
+
|
171
|
+
1. **GoogleSearchService**: Handles Google API interactions for search functionality
|
172
|
+
2. **ContentExtractor**: Manages webpage content analysis and extraction
|
173
|
+
|
174
|
+
The server uses caching mechanisms to improve performance and reduce API calls.
|
175
|
+
|
176
|
+
## Distributing the Built Version
|
177
|
+
|
178
|
+
If you prefer to distribute only the built version of this tool rather than the source code, you can follow these steps:
|
179
|
+
|
180
|
+
1. Build the TypeScript code:
|
181
|
+
```bash
|
182
|
+
npm run build
|
183
|
+
```
|
184
|
+
|
185
|
+
2. Create a distribution package with only the necessary files:
|
186
|
+
```bash
|
187
|
+
# Create a distribution directory
|
188
|
+
mkdir -p dist-package
|
189
|
+
|
190
|
+
# Copy the compiled JavaScript files
|
191
|
+
cp -r dist dist-package/
|
192
|
+
|
193
|
+
# Copy package files (without dev dependencies)
|
194
|
+
cp package.json dist-package/
|
195
|
+
cp README.md dist-package/
|
196
|
+
|
197
|
+
# Create a simplified package.json for distribution
|
198
|
+
node -e "const pkg = require('./package.json'); delete pkg.devDependencies; delete pkg.scripts.build; delete pkg.scripts.dev; pkg.scripts.start = 'node dist/google-search.js'; require('fs').writeFileSync('dist-package/package.json', JSON.stringify(pkg, null, 2));"
|
199
|
+
```
|
200
|
+
|
201
|
+
3. Users can then install and run the built version:
|
202
|
+
```bash
|
203
|
+
# Install production dependencies only
|
204
|
+
npm install --production
|
205
|
+
|
206
|
+
# Start the server
|
207
|
+
npm start
|
208
|
+
```
|
209
|
+
|
210
|
+
This approach allows you to distribute the compiled JavaScript files without exposing the TypeScript source code. Users will still need to:
|
211
|
+
|
212
|
+
1. Configure their Google API credentials as environment variables
|
213
|
+
2. Add the server configuration to their MCP settings file
|
214
|
+
3. Install the production dependencies
|
215
|
+
|
216
|
+
Note that the package.json in the distribution will only include production dependencies and a simplified set of scripts.
|
217
|
+
|
218
|
+
## License
|
219
|
+
|
220
|
+
MIT
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import axios from 'axios';
|
2
|
+
export class ContentFetcher {
|
3
|
+
constructor(port = 5001) {
|
4
|
+
this.baseUrl = `http://localhost:${port}`;
|
5
|
+
}
|
6
|
+
async fetchContent(url) {
|
7
|
+
try {
|
8
|
+
const response = await axios.post(`${this.baseUrl}/analyze`, { url });
|
9
|
+
return response.data;
|
10
|
+
}
|
11
|
+
catch (error) {
|
12
|
+
if (axios.isAxiosError(error)) {
|
13
|
+
throw new Error(`Failed to fetch content: ${error.response?.data?.error || error.message}`);
|
14
|
+
}
|
15
|
+
if (error instanceof Error) {
|
16
|
+
throw new Error(`Failed to fetch content: ${error.message}`);
|
17
|
+
}
|
18
|
+
throw new Error('Failed to fetch content: Unknown error');
|
19
|
+
}
|
20
|
+
}
|
21
|
+
async batchFetchContent(urls) {
|
22
|
+
try {
|
23
|
+
const response = await axios.post(`${this.baseUrl}/batch_analyze`, { urls });
|
24
|
+
return response.data;
|
25
|
+
}
|
26
|
+
catch (error) {
|
27
|
+
if (axios.isAxiosError(error)) {
|
28
|
+
throw new Error(`Failed to batch fetch content: ${error.response?.data?.error || error.message}`);
|
29
|
+
}
|
30
|
+
if (error instanceof Error) {
|
31
|
+
throw new Error(`Failed to batch fetch content: ${error.message}`);
|
32
|
+
}
|
33
|
+
throw new Error('Failed to batch fetch content: Unknown error');
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|