pse-mcp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,17 +4,13 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
4
4
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
5
5
  import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
6
6
  import { GoogleSearchService } from './services/google-search.service.js';
7
- import { ContentExtractor } from './services/content-extractor.service.js';
8
- import { OutputFormat } from './types.js';
9
7
 
10
8
  class GoogleSearchServer {
11
9
  private server: Server;
12
10
  private searchService: GoogleSearchService;
13
- private contentExtractor: ContentExtractor;
14
11
 
15
12
  constructor() {
16
13
  this.searchService = new GoogleSearchService();
17
- this.contentExtractor = new ContentExtractor();
18
14
  this.server = new Server(
19
15
  {
20
16
  name: 'google-search',
@@ -34,7 +30,7 @@ class GoogleSearchServer {
34
30
  },
35
31
  num_results: {
36
32
  type: 'number',
37
- description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.'
33
+ description: 'Number of results to return (default: 10, max: 10). Increase for broader coverage, decrease for faster response.'
38
34
  },
39
35
  site: {
40
36
  type: 'string',
@@ -62,7 +58,7 @@ class GoogleSearchServer {
62
58
  },
63
59
  resultsPerPage: {
64
60
  type: 'number',
65
- description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.'
61
+ description: 'Number of results to show per page (default: 10, max: 10). Controls how many results are returned for each page.'
66
62
  },
67
63
  sort: {
68
64
  type: 'string',
@@ -71,41 +67,6 @@ class GoogleSearchServer {
71
67
  },
72
68
  required: ['query']
73
69
  }
74
- },
75
- extract_webpage_content: {
76
- description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.',
77
- inputSchema: {
78
- type: 'object',
79
- properties: {
80
- url: {
81
- type: 'string',
82
- description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.'
83
- },
84
- format: {
85
- type: 'string',
86
- description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
87
- }
88
- },
89
- required: ['url']
90
- }
91
- },
92
- extract_multiple_webpages: {
93
- description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.',
94
- inputSchema: {
95
- type: 'object',
96
- properties: {
97
- urls: {
98
- type: 'array',
99
- items: { type: 'string' },
100
- description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.'
101
- },
102
- format: {
103
- type: 'string',
104
- description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
105
- }
106
- },
107
- required: ['urls']
108
- }
109
70
  }
110
71
  }
111
72
  }
@@ -126,7 +87,7 @@ class GoogleSearchServer {
126
87
  },
127
88
  num_results: {
128
89
  type: 'number',
129
- description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.'
90
+ description: 'Number of results to return (default: 10, max: 10). Increase for broader coverage, decrease for faster response.'
130
91
  },
131
92
  site: {
132
93
  type: 'string',
@@ -154,7 +115,7 @@ class GoogleSearchServer {
154
115
  },
155
116
  resultsPerPage: {
156
117
  type: 'number',
157
- description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.'
118
+ description: 'Number of results to show per page (default: 10, max: 10). Controls how many results are returned for each page.'
158
119
  },
159
120
  sort: {
160
121
  type: 'string',
@@ -163,43 +124,6 @@ class GoogleSearchServer {
163
124
  },
164
125
  required: ['query']
165
126
  }
166
- },
167
- {
168
- name: 'extract_webpage_content',
169
- description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.',
170
- inputSchema: {
171
- type: 'object',
172
- properties: {
173
- url: {
174
- type: 'string',
175
- description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.'
176
- },
177
- format: {
178
- type: 'string',
179
- description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
180
- }
181
- },
182
- required: ['url']
183
- }
184
- },
185
- {
186
- name: 'extract_multiple_webpages',
187
- description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.',
188
- inputSchema: {
189
- type: 'object',
190
- properties: {
191
- urls: {
192
- type: 'array',
193
- items: { type: 'string' },
194
- description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.'
195
- },
196
- format: {
197
- type: 'string',
198
- description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
199
- }
200
- },
201
- required: ['urls']
202
- }
203
127
  }
204
128
  ]
205
129
  }));
@@ -226,24 +150,6 @@ class GoogleSearchServer {
226
150
  }
227
151
  throw new Error('Invalid arguments for google_search tool');
228
152
 
229
- case 'extract_webpage_content':
230
- if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) {
231
- return this.handleAnalyzeWebpage({
232
- url: String(request.params.arguments.url),
233
- format: request.params.arguments.format ? String(request.params.arguments.format) as OutputFormat : 'markdown'
234
- });
235
- }
236
- throw new Error('Invalid arguments for extract_webpage_content tool');
237
-
238
- case 'extract_multiple_webpages':
239
- if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'urls' in request.params.arguments && Array.isArray(request.params.arguments.urls)) {
240
- return this.handleBatchAnalyzeWebpages({
241
- urls: request.params.arguments.urls.map(String),
242
- format: request.params.arguments.format ? String(request.params.arguments.format) as OutputFormat : 'markdown'
243
- });
244
- }
245
- throw new Error('Invalid arguments for extract_multiple_webpages tool');
246
-
247
153
  default:
248
154
  throw new Error(`Unknown tool: ${request.params.name}`);
249
155
  }
@@ -334,122 +240,6 @@ class GoogleSearchServer {
334
240
  }
335
241
  }
336
242
 
337
- private async handleAnalyzeWebpage(args: { url: string; format?: OutputFormat; summarize?: boolean }) {
338
- try {
339
- const content = await this.contentExtractor.extractContent(args.url, args.format);
340
-
341
- // Format the response in a more readable, concise way
342
- let responseText = `Content from: ${content.url}\n\n`;
343
- responseText += `Title: ${content.title}\n`;
344
-
345
- if (content.description) {
346
- responseText += `Description: ${content.description}\n`;
347
- }
348
-
349
- responseText += `\nStats: ${content.stats.word_count} words, ${content.stats.approximate_chars} characters\n\n`;
350
-
351
- // Add the summary if available
352
- if (content.summary) {
353
- responseText += `Summary: ${content.summary}\n\n`;
354
- }
355
-
356
- // Add a preview of the content
357
- responseText += `Content Preview:\n${content.content_preview.first_500_chars}\n\n`;
358
-
359
- // Add a note about requesting specific information
360
- responseText += `Note: This is a preview of the content. For specific information, please ask about particular aspects of this webpage.`;
361
-
362
- return {
363
- content: [
364
- {
365
- type: 'text',
366
- text: responseText,
367
- },
368
- ],
369
- };
370
- } catch (error) {
371
- const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
372
- const helpText = 'Common issues:\n- Check if the URL is accessible in a browser\n- Ensure the webpage is public\n- Try again if it\'s a temporary network issue';
373
-
374
- return {
375
- content: [
376
- {
377
- type: 'text',
378
- text: `${errorMessage}\n\n${helpText}`,
379
- },
380
- ],
381
- isError: true,
382
- };
383
- }
384
- }
385
-
386
- private async handleBatchAnalyzeWebpages(args: { urls: string[]; format?: OutputFormat }) {
387
- if (args.urls.length > 5) {
388
- return {
389
- content: [{
390
- type: 'text',
391
- text: 'Maximum 5 URLs allowed per request to maintain performance. Please reduce the number of URLs.'
392
- }],
393
- isError: true
394
- };
395
- }
396
-
397
- try {
398
- const results = await this.contentExtractor.batchExtractContent(args.urls, args.format);
399
-
400
- // Format the response in a more readable, concise way
401
- let responseText = `Content from ${args.urls.length} webpages:\n\n`;
402
-
403
- for (const [url, result] of Object.entries(results)) {
404
- responseText += `URL: ${url}\n`;
405
-
406
- if ('error' in result) {
407
- responseText += `Error: ${result.error}\n\n`;
408
- continue;
409
- }
410
-
411
- responseText += `Title: ${result.title}\n`;
412
-
413
- if (result.description) {
414
- responseText += `Description: ${result.description}\n`;
415
- }
416
-
417
- responseText += `Stats: ${result.stats.word_count} words\n`;
418
-
419
- // Add summary if available
420
- if (result.summary) {
421
- responseText += `Summary: ${result.summary}\n`;
422
- }
423
-
424
- responseText += `Preview: ${result.content_preview.first_500_chars.substring(0, 150)}...\n\n`;
425
- }
426
-
427
- responseText += `Note: These are previews of the content. To analyze the full content of a specific URL, use the extract_webpage_content tool with that URL.`;
428
-
429
- return {
430
- content: [
431
- {
432
- type: 'text',
433
- text: responseText,
434
- },
435
- ],
436
- };
437
- } catch (error) {
438
- const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
439
- const helpText = 'Common issues:\n- Check if all URLs are accessible in a browser\n- Ensure all webpages are public\n- Try again if it\'s a temporary network issue\n- Consider reducing the number of URLs';
440
-
441
- return {
442
- content: [
443
- {
444
- type: 'text',
445
- text: `${errorMessage}\n\n${helpText}`,
446
- },
447
- ],
448
- isError: true,
449
- };
450
- }
451
- }
452
-
453
243
  async start() {
454
244
  try {
455
245
  const transport = new StdioServerTransport();
@@ -79,7 +79,7 @@ export class GoogleSearchService {
79
79
  }
80
80
  }
81
81
 
82
- async search(query: string, numResults: number = 5, filters?: SearchFilters): Promise<{
82
+ async search(query: string, numResults: number = 10, filters?: SearchFilters): Promise<{
83
83
  results: SearchResult[];
84
84
  pagination?: SearchPaginationInfo;
85
85
  categories?: CategoryInfo[];
package/src/types.ts CHANGED
@@ -40,25 +40,3 @@ export interface SearchResponse {
40
40
  categories?: CategoryInfo[];
41
41
  }
42
42
 
43
- export type OutputFormat = 'markdown' | 'html' | 'text';
44
-
45
- export interface WebpageContent {
46
- url: string;
47
- title: string;
48
- description: string;
49
- content: string;
50
- format: OutputFormat;
51
- meta_tags: Record<string, string>;
52
- stats: {
53
- word_count: number;
54
- approximate_chars: number;
55
- };
56
- content_preview: {
57
- first_500_chars: string;
58
- };
59
- summary?: string;
60
- }
61
-
62
- export interface WebpageAnalysisResponse {
63
- [url: string]: WebpageContent | { error: string };
64
- }
package/GEMINI.md DELETED
@@ -1,72 +0,0 @@
1
- # GEMINI.md
2
-
3
- ## Project Overview
4
-
5
- This project is a TypeScript-based MCP (Model Context Protocol) server that provides Google search capabilities and webpage content analysis tools. It enables AI models to programmatically perform Google searches and analyze webpage content.
6
-
7
- The server is built with TypeScript and utilizes the `@modelcontextprotocol/sdk` for MCP communication. It exposes three main tools: `google_search`, `extract_webpage_content`, and `extract_multiple_webpages`.
8
-
9
- The core logic is divided into two services:
10
- - `GoogleSearchService`: Handles interactions with the Google Custom Search API, including caching, pagination, and result categorization.
11
- - `ContentExtractor`: Fetches and extracts the main content from webpages using `axios`, `cheerio`, and `@mozilla/readability`. It supports various output formats and caching.
12
-
13
- ## Building and Running
14
-
15
- ### Prerequisites
16
-
17
- - Node.js (v16 or higher)
18
- - Google Cloud Platform account
19
- - Custom Search Engine ID
20
- - Google API Key
21
-
22
- ### Installation
23
-
24
- 1. Install Node.js dependencies:
25
- ```bash
26
- npm install
27
- ```
28
-
29
- ### Environment Variables
30
-
31
- The following environment variables must be set:
32
-
33
- - `GOOGLE_API_KEY`: Your Google API key.
34
- - `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID.
35
-
36
- ### Building
37
-
38
- To build the TypeScript code, run:
39
-
40
- ```bash
41
- npm run build
42
- ```
43
-
44
- This will compile the TypeScript files from `src` into JavaScript files in the `dist` directory.
45
-
46
- ### Running
47
-
48
- To start the MCP server, run:
49
-
50
- ```bash
51
- npm run start
52
- ```
53
-
54
- This will execute the compiled `dist/google-search.js` file.
55
-
56
- For development, you can use:
57
-
58
- ```bash
59
- npm run dev
60
- ```
61
-
62
- This will watch for changes in the `src` directory and automatically recompile the code.
63
-
64
- ## Development Conventions
65
-
66
- - **Language:** TypeScript
67
- - **Module System:** ES Modules (`"type": "module"` in `package.json`)
68
- - **Compiler Target:** ES2020
69
- - **Code Style:** The code is well-structured and follows standard TypeScript conventions. It uses classes for services and the main server logic.
70
- - **Error Handling:** The code includes error handling for API requests and other operations.
71
- - **Caching:** Caching is implemented for both search results and webpage content to improve performance and reduce API calls.
72
- - **Testing:** There are no explicit test files in the provided structure.
package/QWEN.md DELETED
@@ -1,207 +0,0 @@
1
- # Google Search MCP Server - Project Context
2
-
3
- ## Project Overview
4
-
5
- The Google Search MCP Server is a Model Context Protocol (MCP) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically through a standardized MCP interface.
6
-
7
- ### Key Features
8
- - Google Custom Search integration
9
- - Advanced search features (filters, sorting, pagination, categorization)
10
- - Webpage content analysis in multiple formats (markdown, HTML, plain text)
11
- - Batch webpage analysis
12
- - Result categorization and classification
13
- - Content summarization
14
- - Optimized, human-readable responses
15
- - MCP-compliant interface
16
-
17
- ### Technology Stack
18
- - **Language**: TypeScript
19
- - **Framework**: Node.js
20
- - **Core Dependencies**:
21
- - `@modelcontextprotocol/sdk`: MCP server SDK
22
- - `googleapis`: Google API integration
23
- - `@mozilla/readability`: Content extraction
24
- - `cheerio`: HTML parsing
25
- - `jsdom`: DOM manipulation
26
- - `turndown`: HTML to Markdown conversion
27
- - `axios`: HTTP client
28
- - `express`: Web framework (if needed)
29
-
30
- ## Project Structure
31
-
32
- ```
33
- D:\ai\pse-mcp\
34
- ├── dist/ # Compiled JavaScript files
35
- ├── dist-package/ # Distribution package
36
- ├── MCP Documents/ # MCP protocol documentation
37
- ├── src/ # Source TypeScript files
38
- │ ├── services/ # Service implementations
39
- │ │ ├── google-search.service.ts
40
- │ │ └── content-extractor.service.ts
41
- │ ├── google-search.ts # Main server entry point
42
- │ ├── mcp.d.ts # MCP type definitions
43
- │ └── types.ts # Type definitions
44
- ├── package.json # Project dependencies and scripts
45
- ├── tsconfig.json # TypeScript configuration
46
- ├── README.md # Project documentation
47
- └── ...
48
- ```
49
-
50
- ## Building and Running
51
-
52
- ### Prerequisites
53
- - Node.js (v16 or higher)
54
- - Google Cloud Platform account
55
- - Custom Search Engine ID
56
- - Google API Key
57
-
58
- ### Setup Commands
59
- 1. Install dependencies:
60
- ```bash
61
- npm install
62
- ```
63
-
64
- 2. Build the TypeScript code:
65
- ```bash
66
- npm run build
67
- ```
68
-
69
- 3. Run the server:
70
- ```bash
71
- npm run start
72
- ```
73
-
74
- ### Environment Variables
75
- Required environment variables:
76
- - `GOOGLE_API_KEY`: Your Google API key
77
- - `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
78
-
79
- ### Development Scripts
80
- - `npm run build`: Compiles TypeScript to JavaScript
81
- - `npm run start`: Runs the compiled server
82
- - `npm run dev`: Watches for changes and recompiles (tsc -w)
83
-
84
- ## Architecture and Components
85
-
86
- ### Main Components
87
- 1. **GoogleSearchService**: Handles Google API interactions for search functionality
88
- 2. **ContentExtractor**: Manages webpage content analysis and extraction
89
- 3. **Main Server (google-search.ts)**: MCP server implementation with tool handlers
90
-
91
- ### Services
92
-
93
- #### GoogleSearchService
94
- - Integrates with Google Custom Search API
95
- - Provides caching mechanism (5-minute TTL, max 100 entries)
96
- - Implements advanced search filtering and pagination
97
- - Categorizes search results by content type
98
- - Handles error management and response formatting
99
-
100
- #### ContentExtractor
101
- - Extracts webpage content using Mozilla Readability
102
- - Converts content to markdown, HTML, or plain text formats
103
- - Implements content caching (30-minute TTL, max 50 entries)
104
- - Generates content summaries and statistics
105
- - Handles batch processing of multiple webpages
106
-
107
- ### Available Tools
108
-
109
- #### 1. google_search
110
- Searches Google and returns relevant results with advanced filtering options:
111
- - Query string (required)
112
- - Number of results (default: 5, max: 10)
113
- - Site filtering
114
- - Language filtering (ISO 639-1 codes)
115
- - Date restrictions
116
- - Exact phrase matching
117
- - Result type (news, images, videos)
118
- - Pagination support
119
- - Sorting (relevance or date)
120
-
121
- #### 2. extract_webpage_content
122
- Extracts and analyzes content from a single webpage:
123
- - URL (required)
124
- - Output format (markdown, html, text)
125
- - Removes ads, navigation, and clutter
126
- - Returns title, description, content stats, and summary
127
-
128
- #### 3. extract_multiple_webpages
129
- Extracts content from multiple webpages in a single request:
130
- - Array of URLs (max 5 per request)
131
- - Output format (markdown, html, text)
132
- - Batch processing capability
133
-
134
- ## Configuration
135
-
136
- The server configuration needs to be added to the MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
137
-
138
- ```json
139
- {
140
- "mcpServers": {
141
- "google-search": {
142
- "autoApprove": [
143
- "google_search",
144
- "extract_webpage_content",
145
- "extract_multiple_webpages"
146
- ],
147
- "disabled": false,
148
- "timeout": 60,
149
- "command": "node",
150
- "args": [
151
- "/path/to/google-search-mcp-server/dist/google-search.js"
152
- ],
153
- "env": {
154
- "GOOGLE_API_KEY": "your-google-api-key",
155
- "GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
156
- },
157
- "transportType": "stdio"
158
- }
159
- }
160
- }
161
- ```
162
-
163
- ## Development Conventions
164
-
165
- ### Coding Standards
166
- - TypeScript with strict mode enabled
167
- - Use of async/await for asynchronous operations
168
- - Proper error handling with descriptive messages
169
- - Input validation for all tool arguments
170
- - Caching strategies for performance optimization
171
-
172
- ### Type Safety
173
- - Comprehensive type definitions in `types.ts`
174
- - Strict typing for all function parameters and return values
175
- - MCP request/response schema validation
176
-
177
- ### Caching Strategy
178
- - Search results: 5-minute TTL with max 100 entries
179
- - Webpage content: 30-minute TTL with max 50 entries
180
- - Cache keys generated from request parameters
181
- - Automatic cleanup of oldest entries when limits exceeded
182
-
183
- ## MCP Protocol Integration
184
-
185
- The server implements the Model Context Protocol with:
186
- - Standardized tool listing and calling
187
- - Input schema validation
188
- - Error response formatting
189
- - Stdio transport for communication with MCP clients
190
-
191
- ## Testing and Verification
192
-
193
- To verify the server is working:
194
- 1. Build with `npm run build`
195
- 2. Start with `npm run start`
196
- 3. Use MCP client to call the available tools
197
- 4. Verify search results and content extraction work as expected
198
-
199
- ## Deployment
200
-
201
- For distribution, the project includes a process to create a compiled distribution package:
202
- 1. Build the TypeScript code
203
- 2. Create a distribution package with only necessary files
204
- 3. Include production dependencies only
205
- 4. Simplified package.json for end users
206
-
207
- The distribution approach allows for shipping compiled JavaScript without exposing source code while maintaining functionality.
@@ -1,36 +0,0 @@
1
- import axios from 'axios';
2
- export class ContentFetcher {
3
- constructor(port = 5001) {
4
- this.baseUrl = `http://localhost:${port}`;
5
- }
6
- async fetchContent(url) {
7
- try {
8
- const response = await axios.post(`${this.baseUrl}/analyze`, { url });
9
- return response.data;
10
- }
11
- catch (error) {
12
- if (axios.isAxiosError(error)) {
13
- throw new Error(`Failed to fetch content: ${error.response?.data?.error || error.message}`);
14
- }
15
- if (error instanceof Error) {
16
- throw new Error(`Failed to fetch content: ${error.message}`);
17
- }
18
- throw new Error('Failed to fetch content: Unknown error');
19
- }
20
- }
21
- async batchFetchContent(urls) {
22
- try {
23
- const response = await axios.post(`${this.baseUrl}/batch_analyze`, { urls });
24
- return response.data;
25
- }
26
- catch (error) {
27
- if (axios.isAxiosError(error)) {
28
- throw new Error(`Failed to batch fetch content: ${error.response?.data?.error || error.message}`);
29
- }
30
- if (error instanceof Error) {
31
- throw new Error(`Failed to batch fetch content: ${error.message}`);
32
- }
33
- throw new Error('Failed to batch fetch content: Unknown error');
34
- }
35
- }
36
- }