npm - pse-mcp - Versions diffs - 0.1.0 → 0.1.1 - Mend

pse-mcp 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +104 -220
package/dist/google-search.js +4 -190
package/dist/services/google-search.service.js +1 -1
package/dist-package/dist/google-search.js +0 -0
package/dist-package/package.json +22 -22
package/package.json +40 -40
package/src/google-search.ts +4 -214
package/src/services/google-search.service.ts +1 -1
package/src/types.ts +0 -22
package/GEMINI.md +0 -72
package/QWEN.md +0 -207
package/dist/content-fetcher.js +0 -36
package/dist/services/content-extractor.service.js +0 -195
package/src/services/content-extractor.service.ts +0 -232
package/tasks.md +0 -141

package/README.md CHANGED Viewed

@@ -1,220 +1,104 @@
-# Version 2.0 is here
-# Google Search MCP Server
-An MCP (Model Context Protocol) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically.
-## Features
-- Google Custom Search integration
-- Advanced search features (filters, sorting, pagination, categorization)
-- Webpage content analysis in multiple formats (markdown, HTML, plain text)
-- Batch webpage analysis
-- Result categorization and classification
-- Content summarization
-- Optimized, human-readable responses
-- MCP-compliant interface
-## Prerequisites
-- Node.js (v16 or higher)
-- Google Cloud Platform account
-- Custom Search Engine ID
-- Google API Key
-## Installation
-1. Clone the repository
-2. Install Node.js dependencies:
-```bash
-npm install
-```
-3. Build the TypeScript code:
-```bash
-npm run build
-```
-## Configuration
-1. Set up environment variables for your Google API credentials:
-You can either set these as system environment variables or configure them in your MCP settings file.
-Required environment variables:
-- `GOOGLE_API_KEY`: Your Google API key
-- `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
-2. Add the server configuration to your MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
-```json
-{
-  "mcpServers": {
-    "google-search": {
-      "autoApprove": [
-        "google_search",
-        "extract_webpage_content",
-        "extract_multiple_webpages"
-      ],
-      "disabled": false,
-      "timeout": 60,
-      "command": "node",
-      "args": [
-        "/path/to/google-search-mcp-server/dist/google-search.js"
-      ],
-      "env": {
-        "GOOGLE_API_KEY": "your-google-api-key",
-        "GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
-      },
-      "transportType": "stdio"
-    }
-  }
-}
-```
-## Running
-Start the MCP server:
-```bash
-npm run start
-```
-## Available Tools
-### 1. google_search
-Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google's search engine.
-```typescript
-{
-  "name": "google_search",
-  "arguments": {
-    "query": "your search query",
-    "num_results": 5, // optional, default: 5
-    "site": "example.com", // optional, limit results to specific website
-    "language": "en", // optional, filter by language (ISO 639-1 code)
-    "dateRestrict": "m6", // optional, filter by date (e.g., "m6" for last 6 months)
-    "exactTerms": "exact phrase", // optional, search for exact phrase
-    "resultType": "news", // optional, specify type (news, images, videos)
-    "page": 2, // optional, page number for pagination (starts at 1)
-    "resultsPerPage": 10, // optional, results per page (max: 10)
-    "sort": "date" // optional, sort by "date" or "relevance" (default)
-  }
-}
-```
-Response includes:
-- Search results with title, link, snippet in a readable format
-- Pagination information (current page, total results, etc.)
-- Categories of results (automatically detected)
-- Navigation hints for pagination
-### 2. extract_webpage_content
-Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter.
-```typescript
-{
-  "name": "extract_webpage_content",
-  "arguments": {
-    "url": "https://example.com",
-    "format": "markdown" // optional, format options: "markdown" (default), "html", or "text"
-  }
-}
-```
-Response includes:
-- Title and description of the webpage
-- Content statistics (word count, character count)
-- Content summary
-- Content preview (first 500 characters)
-### 3. extract_multiple_webpages
-Extract and analyze content from multiple webpages in a single request. Ideal for comparing information across different sources or gathering comprehensive information on a topic.
-```typescript
-{
-  "name": "extract_multiple_webpages",
-  "arguments": {
-    "urls": [
-      "https://example1.com",
-      "https://example2.com"
-    ],
-    "format": "html" // optional, format options: "markdown" (default), "html", or "text"
-  }
-}
-```
-Response includes:
-- Title and description of each webpage
-- Content statistics for each webpage
-- Content summary for each webpage
-- Content preview for each webpage (first 150 characters)
-## Getting Google API Credentials
-1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
-2. Create a new project or select an existing one
-3. Enable the Custom Search API
-4. Create API credentials (API Key)
-5. Go to the [Custom Search Engine](https://programmablesearchengine.google.com/about/) page
-6. Create a new search engine and get your Search Engine ID
-7. Add these credentials to your MCP settings file or set them as environment variables
-## Error Handling
-The server provides detailed error messages for:
-- Missing or invalid API credentials
-- Failed search requests
-- Invalid webpage URLs
-- Network connectivity issues
-## Architecture
-The server is built with TypeScript and uses the MCP SDK to provide a standardized interface for AI models to interact with Google Search and webpage content analysis tools. It consists of two main services:
-1. **GoogleSearchService**: Handles Google API interactions for search functionality
-2. **ContentExtractor**: Manages webpage content analysis and extraction
-The server uses caching mechanisms to improve performance and reduce API calls.
-## Distributing the Built Version
-If you prefer to distribute only the built version of this tool rather than the source code, you can follow these steps:
-1. Build the TypeScript code:
-```bash
-npm run build
-```
-2. Create a distribution package with only the necessary files:
-```bash
-# Create a distribution directory
-mkdir -p dist-package
-# Copy the compiled JavaScript files
-cp -r dist dist-package/
-# Copy package files (without dev dependencies)
-cp package.json dist-package/
-cp README.md dist-package/
-# Create a simplified package.json for distribution
-node -e "const pkg = require('./package.json'); delete pkg.devDependencies; delete pkg.scripts.build; delete pkg.scripts.dev; pkg.scripts.start = 'node dist/google-search.js'; require('fs').writeFileSync('dist-package/package.json', JSON.stringify(pkg, null, 2));"
-```
-3. Users can then install and run the built version:
-```bash
-# Install production dependencies only
-npm install --production
-# Start the server
-npm start
-```
-This approach allows you to distribute the compiled JavaScript files without exposing the TypeScript source code. Users will still need to:
-1. Configure their Google API credentials as environment variables
-2. Add the server configuration to their MCP settings file
-3. Install the production dependencies
-Note that the package.json in the distribution will only include production dependencies and a simplified set of scripts.
-## License
-MIT
+# Version 2.0 is here
+# Google Search MCP Server
+An MCP (Model Context Protocol) server that provides Google search capabilities. This server enables AI models to perform Google searches programmatically.
+## Features
+- Google Custom Search integration
+- Advanced search features (filters, sorting, pagination, categorization)
+- Optimized, human-readable responses
+- MCP-compliant interface
+## Prerequisites
+- Node.js (v16 or higher)
+- Google Cloud Platform account
+- Custom Search Engine ID
+- Google API Key
+## Configuration
+1. Set up environment variables for your Google API credentials:
+You can either set these as system environment variables or configure them in your MCP settings file.
+Required environment variables:
+- `GOOGLE_API_KEY`: Your Google API key
+- `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
+1. mcp settings:
+```json
+{
+  "mcpServers": {
+    "google-search": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "pse-mcp"
+      ],
+      "env": {
+        "GOOGLE_API_KEY": "your-google-api-key",
+        "GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
+      }
+    }
+  }
+}
+```
+## Available Tools
+### 1. google_search
+Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google's search engine.
+```typescript
+{
+  "name": "google_search",
+  "arguments": {
+    "query": "your search query",
+    "num_results": 10, // optional, default: 10
+    "site": "example.com", // optional, limit results to specific website
+    "language": "en", // optional, filter by language (ISO 639-1 code)
+    "dateRestrict": "m6", // optional, filter by date (e.g., "m6" for last 6 months)
+    "exactTerms": "exact phrase", // optional, search for exact phrase
+    "resultType": "news", // optional, specify type (news, images, videos)
+    "page": 2, // optional, page number for pagination (starts at 1)
+    "resultsPerPage": 10, // optional, default: 10, max: 10
+    "sort": "date" // optional, sort by "date" or "relevance" (default)
+  }
+}
+```
+Response includes:
+- Search results with title, link, snippet in a readable format
+- Pagination information (current page, total results, etc.)
+- Categories of results (automatically detected)
+- Navigation hints for pagination
+## Getting Google API Credentials
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
+2. Create a new project or select an existing one
+3. Enable the Custom Search API
+4. Create API credentials (API Key)
+5. Go to the [Custom Search Engine](https://programmablesearchengine.google.com/about/) page
+6. Create a new search engine and get your Search Engine ID
+7. Add these credentials to your MCP settings file or set them as environment variables
+## Error Handling
+The server provides detailed error messages for:
+- Missing or invalid API credentials
+- Failed search requests
+- Invalid webpage URLs
+- Network connectivity issues
+## Architecture
+The server is built with TypeScript and uses the MCP SDK to provide a standardized interface for AI models to interact with Google Search. It consists of the **GoogleSearchService**, which handles Google API interactions for search functionality.
+The server uses caching mechanisms to improve performance and reduce API calls.
+## License
+MIT

package/dist/google-search.js CHANGED Viewed

@@ -3,11 +3,9 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
 import { GoogleSearchService } from './services/google-search.service.js';
-import { ContentExtractor } from './services/content-extractor.service.js';
 class GoogleSearchServer {
     constructor() {
         this.searchService = new GoogleSearchService();
-        this.contentExtractor = new ContentExtractor();
         this.server = new Server({
             name: 'google-search',
             version: '1.0.0'
@@ -25,7 +23,7 @@ class GoogleSearchServer {
                                 },
                                 num_results: {
                                     type: 'number',
-                                    description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.'
+                                    description: 'Number of results to return (default: 10, max: 10). Increase for broader coverage, decrease for faster response.'
                                 },
                                 site: {
                                     type: 'string',
@@ -53,7 +51,7 @@ class GoogleSearchServer {
                                 },
                                 resultsPerPage: {
                                     type: 'number',
-                                    description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.'
+                                    description: 'Number of results to show per page (default: 10, max: 10). Controls how many results are returned for each page.'
                                 },
                                 sort: {
                                     type: 'string',
@@ -62,41 +60,6 @@ class GoogleSearchServer {
                             },
                             required: ['query']
                         }
-                    },
-                    extract_webpage_content: {
-                        description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.',
-                        inputSchema: {
-                            type: 'object',
-                            properties: {
-                                url: {
-                                    type: 'string',
-                                    description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.'
-                                },
-                                format: {
-                                    type: 'string',
-                                    description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
-                                }
-                            },
-                            required: ['url']
-                        }
-                    },
-                    extract_multiple_webpages: {
-                        description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.',
-                        inputSchema: {
-                            type: 'object',
-                            properties: {
-                                urls: {
-                                    type: 'array',
-                                    items: { type: 'string' },
-                                    description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.'
-                                },
-                                format: {
-                                    type: 'string',
-                                    description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
-                                }
-                            },
-                            required: ['urls']
-                        }
                     }
                 }
             }
@@ -116,7 +79,7 @@ class GoogleSearchServer {
                             },
                             num_results: {
                                 type: 'number',
-                                description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.'
+                                description: 'Number of results to return (default: 10, max: 10). Increase for broader coverage, decrease for faster response.'
                             },
                             site: {
                                 type: 'string',
@@ -144,7 +107,7 @@ class GoogleSearchServer {
                             },
                             resultsPerPage: {
                                 type: 'number',
-                                description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.'
+                                description: 'Number of results to show per page (default: 10, max: 10). Controls how many results are returned for each page.'
                             },
                             sort: {
                                 type: 'string',
@@ -153,43 +116,6 @@ class GoogleSearchServer {
                         },
                         required: ['query']
                     }
-                },
-                {
-                    name: 'extract_webpage_content',
-                    description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.',
-                    inputSchema: {
-                        type: 'object',
-                        properties: {
-                            url: {
-                                type: 'string',
-                                description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.'
-                            },
-                            format: {
-                                type: 'string',
-                                description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
-                            }
-                        },
-                        required: ['url']
-                    }
-                },
-                {
-                    name: 'extract_multiple_webpages',
-                    description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.',
-                    inputSchema: {
-                        type: 'object',
-                        properties: {
-                            urls: {
-                                type: 'array',
-                                items: { type: 'string' },
-                                description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.'
-                            },
-                            format: {
-                                type: 'string',
-                                description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".'
-                            }
-                        },
-                        required: ['urls']
-                    }
                 }
             ]
         }));
@@ -214,22 +140,6 @@ class GoogleSearchServer {
                         });
                     }
                     throw new Error('Invalid arguments for google_search tool');
-                case 'extract_webpage_content':
-                    if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) {
-                        return this.handleAnalyzeWebpage({
-                            url: String(request.params.arguments.url),
-                            format: request.params.arguments.format ? String(request.params.arguments.format) : 'markdown'
-                        });
-                    }
-                    throw new Error('Invalid arguments for extract_webpage_content tool');
-                case 'extract_multiple_webpages':
-                    if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'urls' in request.params.arguments && Array.isArray(request.params.arguments.urls)) {
-                        return this.handleBatchAnalyzeWebpages({
-                            urls: request.params.arguments.urls.map(String),
-                            format: request.params.arguments.format ? String(request.params.arguments.format) : 'markdown'
-                        });
-                    }
-                    throw new Error('Invalid arguments for extract_multiple_webpages tool');
                 default:
                     throw new Error(`Unknown tool: ${request.params.name}`);
             }
@@ -298,102 +208,6 @@ class GoogleSearchServer {
             };
         }
     }
-    async handleAnalyzeWebpage(args) {
-        try {
-            const content = await this.contentExtractor.extractContent(args.url, args.format);
-            // Format the response in a more readable, concise way
-            let responseText = `Content from: ${content.url}\n\n`;
-            responseText += `Title: ${content.title}\n`;
-            if (content.description) {
-                responseText += `Description: ${content.description}\n`;
-            }
-            responseText += `\nStats: ${content.stats.word_count} words, ${content.stats.approximate_chars} characters\n\n`;
-            // Add the summary if available
-            if (content.summary) {
-                responseText += `Summary: ${content.summary}\n\n`;
-            }
-            // Add a preview of the content
-            responseText += `Content Preview:\n${content.content_preview.first_500_chars}\n\n`;
-            // Add a note about requesting specific information
-            responseText += `Note: This is a preview of the content. For specific information, please ask about particular aspects of this webpage.`;
-            return {
-                content: [
-                    {
-                        type: 'text',
-                        text: responseText,
-                    },
-                ],
-            };
-        }
-        catch (error) {
-            const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
-            const helpText = 'Common issues:\n- Check if the URL is accessible in a browser\n- Ensure the webpage is public\n- Try again if it\'s a temporary network issue';
-            return {
-                content: [
-                    {
-                        type: 'text',
-                        text: `${errorMessage}\n\n${helpText}`,
-                    },
-                ],
-                isError: true,
-            };
-        }
-    }
-    async handleBatchAnalyzeWebpages(args) {
-        if (args.urls.length > 5) {
-            return {
-                content: [{
-                        type: 'text',
-                        text: 'Maximum 5 URLs allowed per request to maintain performance. Please reduce the number of URLs.'
-                    }],
-                isError: true
-            };
-        }
-        try {
-            const results = await this.contentExtractor.batchExtractContent(args.urls, args.format);
-            // Format the response in a more readable, concise way
-            let responseText = `Content from ${args.urls.length} webpages:\n\n`;
-            for (const [url, result] of Object.entries(results)) {
-                responseText += `URL: ${url}\n`;
-                if ('error' in result) {
-                    responseText += `Error: ${result.error}\n\n`;
-                    continue;
-                }
-                responseText += `Title: ${result.title}\n`;
-                if (result.description) {
-                    responseText += `Description: ${result.description}\n`;
-                }
-                responseText += `Stats: ${result.stats.word_count} words\n`;
-                // Add summary if available
-                if (result.summary) {
-                    responseText += `Summary: ${result.summary}\n`;
-                }
-                responseText += `Preview: ${result.content_preview.first_500_chars.substring(0, 150)}...\n\n`;
-            }
-            responseText += `Note: These are previews of the content. To analyze the full content of a specific URL, use the extract_webpage_content tool with that URL.`;
-            return {
-                content: [
-                    {
-                        type: 'text',
-                        text: responseText,
-                    },
-                ],
-            };
-        }
-        catch (error) {
-            const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
-            const helpText = 'Common issues:\n- Check if all URLs are accessible in a browser\n- Ensure all webpages are public\n- Try again if it\'s a temporary network issue\n- Consider reducing the number of URLs';
-            return {
-                content: [
-                    {
-                        type: 'text',
-                        text: `${errorMessage}\n\n${helpText}`,
-                    },
-                ],
-                isError: true,
-            };
-        }
-    }
     async start() {
         try {
             const transport = new StdioServerTransport();

package/dist/services/google-search.service.js CHANGED Viewed

@@ -52,7 +52,7 @@ export class GoogleSearchService {
             this.searchCache.delete(oldestKey);
         }
     }
-    async search(query, numResults = 5, filters) {
+    async search(query, numResults = 10, filters) {
         try {
             // Generate cache key
             const cacheKey = this.generateCacheKey(query, numResults, filters);

package/dist-package/dist/google-search.js CHANGED Viewed

File without changes

package/dist-package/package.json CHANGED Viewed

@@ -1,23 +1,23 @@
-{
-  "name": "google-search-mcp",
-  "version": "0.1.0",
-  "description": "MCP server for Google search and webpage analysis",
-  "type": "module",
-  "scripts": {
-    "start": "node dist/google-search.js"
-  },
-  "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.0.1",
-    "@mozilla/readability": "^0.6.0",
-    "@types/turndown": "^5.0.5",
-    "axios": "^1.7.9",
-    "cheerio": "^1.0.0",
-    "dompurify": "^3.2.3",
-    "express": "^4.21.2",
-    "googleapis": "^144.0.0",
-    "jsdom": "^25.0.1",
-    "markdown-it": "^14.1.0",
-    "readability": "^0.1.0",
-    "turndown": "^7.2.0"
-  }
+{
+  "name": "google-search-mcp",
+  "version": "0.1.1",
+  "description": "MCP server for Google search and webpage analysis",
+  "type": "module",
+  "scripts": {
+    "start": "node dist/google-search.js"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.1",
+    "@mozilla/readability": "^0.6.0",
+    "@types/turndown": "^5.0.5",
+    "axios": "^1.7.9",
+    "cheerio": "^1.0.0",
+    "dompurify": "^3.2.3",
+    "express": "^4.21.2",
+    "googleapis": "^144.0.0",
+    "jsdom": "^25.0.1",
+    "markdown-it": "^14.1.0",
+    "readability": "^0.1.0",
+    "turndown": "^7.2.0"
+  }
 }