npm - pse-mcp - Versions diffs - 0.1.0 - Mend

pse-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/GEMINI.md +72 -0
package/License.md +3 -0
package/MCP Documents/README.md +1 -0
package/MCP Documents/mcp-client-guide.txt +736 -0
package/MCP Documents/mcp-complete-guide.txt +522 -0
package/MCP Documents/mcp-enhanced-instructions.md +297 -0
package/MCP Documents/mcp-server-guide.md +415 -0
package/MCP Documents/mcp-windows.txt +161 -0
package/QWEN.md +207 -0
package/README.md +220 -0
package/dist/content-fetcher.js +36 -0
package/dist/google-search.js +421 -0
package/dist/services/content-extractor.service.js +195 -0
package/dist/services/google-search.service.js +244 -0
package/dist/types.js +1 -0
package/dist-package/README.md +210 -0
package/dist-package/dist/content-fetcher.js +36 -0
package/dist-package/dist/google-search.js +420 -0
package/dist-package/dist/services/content-extractor.service.js +195 -0
package/dist-package/dist/services/google-search.service.js +244 -0
package/dist-package/dist/types.js +1 -0
package/dist-package/package-lock.json +3104 -0
package/dist-package/package.json +23 -0
package/license +4 -0
package/package.json +40 -0
package/src/google-search.ts +477 -0
package/src/mcp.d.ts +36 -0
package/src/services/content-extractor.service.ts +232 -0
package/src/services/google-search.service.ts +305 -0
package/src/types.ts +64 -0
package/tasks.md +141 -0
package/tsconfig.json +16 -0

package/dist/services/google-search.service.js ADDED Viewed

@@ -0,0 +1,244 @@
+import { google } from 'googleapis';
+import { URL } from 'url';
+export class GoogleSearchService {
+    constructor() {
+        // Cache for search results (key: query string + filters, value: results)
+        this.searchCache = new Map();
+        // Cache expiration time in milliseconds (5 minutes)
+        this.cacheTTL = 5 * 60 * 1000;
+        const apiKey = process.env.GOOGLE_API_KEY;
+        const searchEngineId = process.env.GOOGLE_SEARCH_ENGINE_ID;
+        if (!apiKey || !searchEngineId) {
+            throw new Error('Missing required environment variables: GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID');
+        }
+        // Initialize Google Custom Search API
+        this.customSearch = google.customsearch('v1').cse;
+        this.searchEngineId = searchEngineId;
+        // Set up the API client
+        google.options({
+            auth: apiKey
+        });
+    }
+    /**
+     * Generate a cache key from search parameters
+     */
+    generateCacheKey(query, numResults, filters) {
+        return JSON.stringify({
+            query,
+            numResults,
+            filters
+        });
+    }
+    /**
+     * Check if a cache entry is still valid
+     */
+    isCacheValid(entry) {
+        const now = Date.now();
+        return now - entry.timestamp < this.cacheTTL;
+    }
+    /**
+     * Store search results in cache
+     */
+    cacheSearchResults(cacheKey, results, pagination, categories) {
+        this.searchCache.set(cacheKey, {
+            timestamp: Date.now(),
+            data: { results, pagination, categories }
+        });
+        // Limit cache size to prevent memory issues (max 100 entries)
+        if (this.searchCache.size > 100) {
+            // Delete oldest entry
+            const oldestKey = Array.from(this.searchCache.entries())
+                .sort((a, b) => a[1].timestamp - b[1].timestamp)[0][0];
+            this.searchCache.delete(oldestKey);
+        }
+    }
+    async search(query, numResults = 5, filters) {
+        try {
+            // Generate cache key
+            const cacheKey = this.generateCacheKey(query, numResults, filters);
+            // Check cache first
+            const cachedResult = this.searchCache.get(cacheKey);
+            if (cachedResult && this.isCacheValid(cachedResult)) {
+                console.error('Using cached search results');
+                return cachedResult.data;
+            }
+            let formattedQuery = query;
+            // Apply site filter if provided
+            if (filters?.site) {
+                formattedQuery += ` site:${filters.site}`;
+            }
+            // Apply exact terms if provided
+            if (filters?.exactTerms) {
+                formattedQuery += ` "${filters.exactTerms}"`;
+            }
+            // Set default pagination values if not provided
+            const page = filters?.page && filters.page > 0 ? filters.page : 1;
+            const resultsPerPage = filters?.resultsPerPage ? Math.min(filters.resultsPerPage, 10) : Math.min(numResults, 10);
+            // Calculate start index for pagination (Google uses 1-based indexing)
+            const startIndex = (page - 1) * resultsPerPage + 1;
+            const params = {
+                cx: this.searchEngineId,
+                q: formattedQuery,
+                num: resultsPerPage,
+                start: startIndex
+            };
+            // Apply language filter if provided
+            if (filters?.language) {
+                params.lr = `lang_${filters.language}`;
+            }
+            // Apply date restriction if provided
+            if (filters?.dateRestrict) {
+                params.dateRestrict = filters.dateRestrict;
+            }
+            // Apply result type filter if provided
+            if (filters?.resultType) {
+                switch (filters.resultType.toLowerCase()) {
+                    case 'image':
+                    case 'images':
+                        params.searchType = 'image';
+                        break;
+                    case 'news':
+                        // For news, we need to modify the query
+                        formattedQuery += ' source:news';
+                        params.q = formattedQuery;
+                        break;
+                    case 'video':
+                    case 'videos':
+                        // For videos, we can use a more specific filter
+                        formattedQuery += ' filetype:video OR inurl:video OR inurl:watch';
+                        params.q = formattedQuery;
+                        break;
+                }
+            }
+            // Apply sorting if provided
+            if (filters?.sort) {
+                switch (filters.sort.toLowerCase()) {
+                    case 'date':
+                        // Sort by date (most recent first)
+                        params.sort = 'date';
+                        break;
+                    case 'relevance':
+                    default:
+                        // Google's default sort is by relevance, so we don't need to specify
+                        break;
+                }
+            }
+            const response = await this.customSearch.list(params);
+            // If no items are found, return empty results with pagination info
+            if (!response.data.items) {
+                return {
+                    results: [],
+                    pagination: {
+                        currentPage: page,
+                        resultsPerPage,
+                        totalResults: 0,
+                        totalPages: 0,
+                        hasNextPage: false,
+                        hasPreviousPage: page > 1
+                    },
+                    categories: []
+                };
+            }
+            // Map the search results and categorize them
+            const results = response.data.items.map(item => {
+                const result = {
+                    title: item.title || '',
+                    link: item.link || '',
+                    snippet: item.snippet || '',
+                    pagemap: item.pagemap || {},
+                    datePublished: item.pagemap?.metatags?.[0]?.['article:published_time'] || '',
+                    source: 'google_search'
+                };
+                // Add category to the result
+                result.category = this.categorizeResult(result);
+                return result;
+            });
+            // Generate category statistics
+            const categories = this.generateCategoryStats(results);
+            // Create pagination information
+            const totalResults = parseInt(response.data.searchInformation?.totalResults || '0', 10);
+            const totalPages = Math.ceil(totalResults / resultsPerPage);
+            const pagination = {
+                currentPage: page,
+                resultsPerPage,
+                totalResults,
+                totalPages,
+                hasNextPage: page < totalPages,
+                hasPreviousPage: page > 1
+            };
+            // Cache the results before returning
+            this.cacheSearchResults(cacheKey, results, pagination, categories);
+            return {
+                results,
+                pagination,
+                categories
+            };
+        }
+        catch (error) {
+            if (error instanceof Error) {
+                throw new Error(`Google Search API error: ${error.message}`);
+            }
+            throw new Error('Unknown error during Google search');
+        }
+    }
+    /**
+     * Categorizes a search result based on its content
+     * @param result The search result to categorize
+     * @returns The category name
+     */
+    categorizeResult(result) {
+        try {
+            // Extract the domain from the URL
+            const url = new URL(result.link);
+            const domain = url.hostname.replace(/^www\./, '');
+            // Check if this is a social media site
+            if (domain.match(/facebook\.com|twitter\.com|instagram\.com|linkedin\.com|pinterest\.com|tiktok\.com|reddit\.com/i)) {
+                return 'Social Media';
+            }
+            // Check if this is a video site
+            if (domain.match(/youtube\.com|vimeo\.com|dailymotion\.com|twitch\.tv/i)) {
+                return 'Video';
+            }
+            // Check if this is a news site
+            if (domain.match(/news|cnn\.com|bbc\.com|nytimes\.com|wsj\.com|reuters\.com|bloomberg\.com/i)) {
+                return 'News';
+            }
+            // Check if this is an educational site
+            if (domain.match(/\.edu$|wikipedia\.org|khan|course|learn|study|academic/i)) {
+                return 'Educational';
+            }
+            // Check if this is a documentation site
+            if (domain.match(/docs|documentation|developer|github\.com|gitlab\.com|bitbucket\.org|stackoverflow\.com/i) ||
+                result.title.match(/docs|documentation|api|reference|manual/i)) {
+                return 'Documentation';
+            }
+            // Check if this is a shopping site
+            if (domain.match(/amazon\.com|ebay\.com|etsy\.com|walmart\.com|shop|store|buy/i)) {
+                return 'Shopping';
+            }
+            // Default category based on domain
+            return domain.split('.').slice(-2, -1)[0].charAt(0).toUpperCase() + domain.split('.').slice(-2, -1)[0].slice(1);
+        }
+        catch (error) {
+            // If there's any error in categorization, return a default category
+            return 'Other';
+        }
+    }
+    /**
+     * Generates category statistics from search results
+     * @param results The search results to analyze
+     * @returns An array of category information
+     */
+    generateCategoryStats(results) {
+        // Count results by category
+        const categoryCounts = {};
+        results.forEach(result => {
+            const category = result.category || 'Other';
+            categoryCounts[category] = (categoryCounts[category] || 0) + 1;
+        });
+        // Convert to array of category info objects
+        return Object.entries(categoryCounts)
+            .map(([name, count]) => ({ name, count }))
+            .sort((a, b) => b.count - a.count); // Sort by count in descending order
+    }
+}

package/dist/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist-package/README.md ADDED Viewed

@@ -0,0 +1,210 @@
+# Google Search MCP Server
+An MCP (Model Context Protocol) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically.
+## Features
+- Google Custom Search integration
+- Advanced search features (filters, sorting, pagination, categorization)
+- Webpage content analysis in multiple formats (markdown, HTML, plain text)
+- Batch webpage analysis
+- Result categorization and classification
+- Content summarization
+- Optimized, human-readable responses
+- MCP-compliant interface
+## Prerequisites
+- Node.js (v16 or higher)
+- Google Cloud Platform account
+- Custom Search Engine ID
+- Google API Key
+## Installation
+1. Clone the repository
+2. Install Node.js dependencies:
+```bash
+npm install
+```
+3. Build the TypeScript code:
+```bash
+npm run build
+```
+## Configuration
+1. Set up environment variables for your Google API credentials:
+You can either set these as system environment variables or configure them in your MCP settings file.
+Required environment variables:
+- `GOOGLE_API_KEY`: Your Google API key
+- `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
+2. Add the server configuration to your MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
+```json
+{
+  "mcpServers": {
+    "google-search": {
+      "autoApprove": [
+        "google_search",
+        "extract_webpage_content",
+        "extract_multiple_webpages"
+      ],
+      "disabled": false,
+      "timeout": 60,
+      "command": "node",
+      "args": [
+        "/path/to/google-search-mcp-server/dist/google-search.js"
+      ],
+      "env": {
+        "GOOGLE_API_KEY": "your-google-api-key",
+        "GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
+      },
+      "transportType": "stdio"
+    }
+  }
+}
+```
+## Running
+Start the MCP server:
+```bash
+npm run start
+```
+## Available Tools
+### 1. google_search
+Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google's search engine.
+```typescript
+{
+  "name": "google_search",
+  "arguments": {
+    "query": "your search query",
+    "num_results": 5, // optional, default: 5
+    "site": "example.com", // optional, limit results to specific website
+    "language": "en", // optional, filter by language (ISO 639-1 code)
+    "dateRestrict": "m6", // optional, filter by date (e.g., "m6" for last 6 months)
+    "exactTerms": "exact phrase", // optional, search for exact phrase
+    "resultType": "news", // optional, specify type (news, images, videos)
+    "page": 2, // optional, page number for pagination (starts at 1)
+    "resultsPerPage": 10, // optional, results per page (max: 10)
+    "sort": "date" // optional, sort by "date" or "relevance" (default)
+  }
+}
+```
+Response includes:
+- Search results with title, link, snippet in a readable format
+- Pagination information (current page, total results, etc.)
+- Categories of results (automatically detected)
+- Navigation hints for pagination
+### 2. extract_webpage_content
+Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter.
+```typescript
+{
+  "name": "extract_webpage_content",
+  "arguments": {
+    "url": "https://example.com",
+    "format": "markdown" // optional, format options: "markdown" (default), "html", or "text"
+  }
+}
+```
+Response includes:
+- Title and description of the webpage
+- Content statistics (word count, character count)
+- Content summary
+- Content preview (first 500 characters)
+### 3. extract_multiple_webpages
+Extract and analyze content from multiple webpages in a single request. Ideal for comparing information across different sources or gathering comprehensive information on a topic.
+```typescript
+{
+  "name": "extract_multiple_webpages",
+  "arguments": {
+    "urls": [
+      "https://example1.com",
+      "https://example2.com"
+    ],
+    "format": "html" // optional, format options: "markdown" (default), "html", or "text"
+  }
+}
+```
+Response includes:
+- Title and description of each webpage
+- Content statistics for each webpage
+- Content summary for each webpage
+- Content preview for each webpage (first 150 characters)
+## Getting Google API Credentials
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
+2. Create a new project or select an existing one
+3. Enable the Custom Search API
+4. Create API credentials (API Key)
+5. Go to the [Custom Search Engine](https://programmablesearchengine.google.com/about/) page
+6. Create a new search engine and get your Search Engine ID
+7. Add these credentials to your MCP settings file or set them as environment variables
+## Error Handling
+The server provides detailed error messages for:
+- Missing or invalid API credentials
+- Failed search requests
+- Invalid webpage URLs
+- Network connectivity issues
+## Architecture
+The server is built with TypeScript and uses the MCP SDK to provide a standardized interface for AI models to interact with Google Search and webpage content analysis tools. It consists of two main services:
+1. **GoogleSearchService**: Handles Google API interactions for search functionality
+2. **ContentExtractor**: Manages webpage content analysis and extraction
+The server uses caching mechanisms to improve performance and reduce API calls.
+## Distributing the Built Version
+If you prefer to distribute only the built version of this tool rather than the source code, you can use the included build script:
+```bash
+npm run build:dist
+```
+This script will:
+1. Build the TypeScript code
+2. Create a distribution package in the `dist-package` directory
+3. Copy the compiled JavaScript files and necessary package files
+4. Create a simplified package.json with only production dependencies
+The distribution package can then be shared with users who don't need access to the source code. Users of the distribution package will need to:
+1. Install production dependencies: `npm install --production`
+2. Configure their Google API credentials as environment variables
+3. Add the server configuration to their MCP settings file
+4. Start the server: `npm start`
+This approach allows you to distribute the compiled JavaScript files without exposing the TypeScript source code.
+### Manual Distribution
+If you prefer to create the distribution package manually, you can use the `build-dist.js` script directly:
+```bash
+node build-dist.js
+```
+This script provides detailed output about each step of the build process and creates the same distribution package as the `npm run build:dist` command.
+## License
+MIT

package/dist-package/dist/content-fetcher.js ADDED Viewed

@@ -0,0 +1,36 @@
+import axios from 'axios';
+export class ContentFetcher {
+    constructor(port = 5001) {
+        this.baseUrl = `http://localhost:${port}`;
+    }
+    async fetchContent(url) {
+        try {
+            const response = await axios.post(`${this.baseUrl}/analyze`, { url });
+            return response.data;
+        }
+        catch (error) {
+            if (axios.isAxiosError(error)) {
+                throw new Error(`Failed to fetch content: ${error.response?.data?.error || error.message}`);
+            }
+            if (error instanceof Error) {
+                throw new Error(`Failed to fetch content: ${error.message}`);
+            }
+            throw new Error('Failed to fetch content: Unknown error');
+        }
+    }
+    async batchFetchContent(urls) {
+        try {
+            const response = await axios.post(`${this.baseUrl}/batch_analyze`, { urls });
+            return response.data;
+        }
+        catch (error) {
+            if (axios.isAxiosError(error)) {
+                throw new Error(`Failed to batch fetch content: ${error.response?.data?.error || error.message}`);
+            }
+            if (error instanceof Error) {
+                throw new Error(`Failed to batch fetch content: ${error.message}`);
+            }
+            throw new Error('Failed to batch fetch content: Unknown error');
+        }
+    }
+}