pse-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ import { google } from 'googleapis';
2
+ import { URL } from 'url';
3
+ export class GoogleSearchService {
4
+ constructor() {
5
+ // Cache for search results (key: query string + filters, value: results)
6
+ this.searchCache = new Map();
7
+ // Cache expiration time in milliseconds (5 minutes)
8
+ this.cacheTTL = 5 * 60 * 1000;
9
+ const apiKey = process.env.GOOGLE_API_KEY;
10
+ const searchEngineId = process.env.GOOGLE_SEARCH_ENGINE_ID;
11
+ if (!apiKey || !searchEngineId) {
12
+ throw new Error('Missing required environment variables: GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID');
13
+ }
14
+ // Initialize Google Custom Search API
15
+ this.customSearch = google.customsearch('v1').cse;
16
+ this.searchEngineId = searchEngineId;
17
+ // Set up the API client
18
+ google.options({
19
+ auth: apiKey
20
+ });
21
+ }
22
+ /**
23
+ * Generate a cache key from search parameters
24
+ */
25
+ generateCacheKey(query, numResults, filters) {
26
+ return JSON.stringify({
27
+ query,
28
+ numResults,
29
+ filters
30
+ });
31
+ }
32
+ /**
33
+ * Check if a cache entry is still valid
34
+ */
35
+ isCacheValid(entry) {
36
+ const now = Date.now();
37
+ return now - entry.timestamp < this.cacheTTL;
38
+ }
39
+ /**
40
+ * Store search results in cache
41
+ */
42
+ cacheSearchResults(cacheKey, results, pagination, categories) {
43
+ this.searchCache.set(cacheKey, {
44
+ timestamp: Date.now(),
45
+ data: { results, pagination, categories }
46
+ });
47
+ // Limit cache size to prevent memory issues (max 100 entries)
48
+ if (this.searchCache.size > 100) {
49
+ // Delete oldest entry
50
+ const oldestKey = Array.from(this.searchCache.entries())
51
+ .sort((a, b) => a[1].timestamp - b[1].timestamp)[0][0];
52
+ this.searchCache.delete(oldestKey);
53
+ }
54
+ }
55
+ async search(query, numResults = 5, filters) {
56
+ try {
57
+ // Generate cache key
58
+ const cacheKey = this.generateCacheKey(query, numResults, filters);
59
+ // Check cache first
60
+ const cachedResult = this.searchCache.get(cacheKey);
61
+ if (cachedResult && this.isCacheValid(cachedResult)) {
62
+ console.error('Using cached search results');
63
+ return cachedResult.data;
64
+ }
65
+ let formattedQuery = query;
66
+ // Apply site filter if provided
67
+ if (filters?.site) {
68
+ formattedQuery += ` site:${filters.site}`;
69
+ }
70
+ // Apply exact terms if provided
71
+ if (filters?.exactTerms) {
72
+ formattedQuery += ` "${filters.exactTerms}"`;
73
+ }
74
+ // Set default pagination values if not provided
75
+ const page = filters?.page && filters.page > 0 ? filters.page : 1;
76
+ const resultsPerPage = filters?.resultsPerPage ? Math.min(filters.resultsPerPage, 10) : Math.min(numResults, 10);
77
+ // Calculate start index for pagination (Google uses 1-based indexing)
78
+ const startIndex = (page - 1) * resultsPerPage + 1;
79
+ const params = {
80
+ cx: this.searchEngineId,
81
+ q: formattedQuery,
82
+ num: resultsPerPage,
83
+ start: startIndex
84
+ };
85
+ // Apply language filter if provided
86
+ if (filters?.language) {
87
+ params.lr = `lang_${filters.language}`;
88
+ }
89
+ // Apply date restriction if provided
90
+ if (filters?.dateRestrict) {
91
+ params.dateRestrict = filters.dateRestrict;
92
+ }
93
+ // Apply result type filter if provided
94
+ if (filters?.resultType) {
95
+ switch (filters.resultType.toLowerCase()) {
96
+ case 'image':
97
+ case 'images':
98
+ params.searchType = 'image';
99
+ break;
100
+ case 'news':
101
+ // For news, we need to modify the query
102
+ formattedQuery += ' source:news';
103
+ params.q = formattedQuery;
104
+ break;
105
+ case 'video':
106
+ case 'videos':
107
+ // For videos, we can use a more specific filter
108
+ formattedQuery += ' filetype:video OR inurl:video OR inurl:watch';
109
+ params.q = formattedQuery;
110
+ break;
111
+ }
112
+ }
113
+ // Apply sorting if provided
114
+ if (filters?.sort) {
115
+ switch (filters.sort.toLowerCase()) {
116
+ case 'date':
117
+ // Sort by date (most recent first)
118
+ params.sort = 'date';
119
+ break;
120
+ case 'relevance':
121
+ default:
122
+ // Google's default sort is by relevance, so we don't need to specify
123
+ break;
124
+ }
125
+ }
126
+ const response = await this.customSearch.list(params);
127
+ // If no items are found, return empty results with pagination info
128
+ if (!response.data.items) {
129
+ return {
130
+ results: [],
131
+ pagination: {
132
+ currentPage: page,
133
+ resultsPerPage,
134
+ totalResults: 0,
135
+ totalPages: 0,
136
+ hasNextPage: false,
137
+ hasPreviousPage: page > 1
138
+ },
139
+ categories: []
140
+ };
141
+ }
142
+ // Map the search results and categorize them
143
+ const results = response.data.items.map(item => {
144
+ const result = {
145
+ title: item.title || '',
146
+ link: item.link || '',
147
+ snippet: item.snippet || '',
148
+ pagemap: item.pagemap || {},
149
+ datePublished: item.pagemap?.metatags?.[0]?.['article:published_time'] || '',
150
+ source: 'google_search'
151
+ };
152
+ // Add category to the result
153
+ result.category = this.categorizeResult(result);
154
+ return result;
155
+ });
156
+ // Generate category statistics
157
+ const categories = this.generateCategoryStats(results);
158
+ // Create pagination information
159
+ const totalResults = parseInt(response.data.searchInformation?.totalResults || '0', 10);
160
+ const totalPages = Math.ceil(totalResults / resultsPerPage);
161
+ const pagination = {
162
+ currentPage: page,
163
+ resultsPerPage,
164
+ totalResults,
165
+ totalPages,
166
+ hasNextPage: page < totalPages,
167
+ hasPreviousPage: page > 1
168
+ };
169
+ // Cache the results before returning
170
+ this.cacheSearchResults(cacheKey, results, pagination, categories);
171
+ return {
172
+ results,
173
+ pagination,
174
+ categories
175
+ };
176
+ }
177
+ catch (error) {
178
+ if (error instanceof Error) {
179
+ throw new Error(`Google Search API error: ${error.message}`);
180
+ }
181
+ throw new Error('Unknown error during Google search');
182
+ }
183
+ }
184
+ /**
185
+ * Categorizes a search result based on its content
186
+ * @param result The search result to categorize
187
+ * @returns The category name
188
+ */
189
+ categorizeResult(result) {
190
+ try {
191
+ // Extract the domain from the URL
192
+ const url = new URL(result.link);
193
+ const domain = url.hostname.replace(/^www\./, '');
194
+ // Check if this is a social media site
195
+ if (domain.match(/facebook\.com|twitter\.com|instagram\.com|linkedin\.com|pinterest\.com|tiktok\.com|reddit\.com/i)) {
196
+ return 'Social Media';
197
+ }
198
+ // Check if this is a video site
199
+ if (domain.match(/youtube\.com|vimeo\.com|dailymotion\.com|twitch\.tv/i)) {
200
+ return 'Video';
201
+ }
202
+ // Check if this is a news site
203
+ if (domain.match(/news|cnn\.com|bbc\.com|nytimes\.com|wsj\.com|reuters\.com|bloomberg\.com/i)) {
204
+ return 'News';
205
+ }
206
+ // Check if this is an educational site
207
+ if (domain.match(/\.edu$|wikipedia\.org|khan|course|learn|study|academic/i)) {
208
+ return 'Educational';
209
+ }
210
+ // Check if this is a documentation site
211
+ if (domain.match(/docs|documentation|developer|github\.com|gitlab\.com|bitbucket\.org|stackoverflow\.com/i) ||
212
+ result.title.match(/docs|documentation|api|reference|manual/i)) {
213
+ return 'Documentation';
214
+ }
215
+ // Check if this is a shopping site
216
+ if (domain.match(/amazon\.com|ebay\.com|etsy\.com|walmart\.com|shop|store|buy/i)) {
217
+ return 'Shopping';
218
+ }
219
+ // Default category based on domain
220
+ return domain.split('.').slice(-2, -1)[0].charAt(0).toUpperCase() + domain.split('.').slice(-2, -1)[0].slice(1);
221
+ }
222
+ catch (error) {
223
+ // If there's any error in categorization, return a default category
224
+ return 'Other';
225
+ }
226
+ }
227
+ /**
228
+ * Generates category statistics from search results
229
+ * @param results The search results to analyze
230
+ * @returns An array of category information
231
+ */
232
+ generateCategoryStats(results) {
233
+ // Count results by category
234
+ const categoryCounts = {};
235
+ results.forEach(result => {
236
+ const category = result.category || 'Other';
237
+ categoryCounts[category] = (categoryCounts[category] || 0) + 1;
238
+ });
239
+ // Convert to array of category info objects
240
+ return Object.entries(categoryCounts)
241
+ .map(([name, count]) => ({ name, count }))
242
+ .sort((a, b) => b.count - a.count); // Sort by count in descending order
243
+ }
244
+ }
package/dist/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,210 @@
1
+ # Google Search MCP Server
2
+
3
+ An MCP (Model Context Protocol) server that provides Google search capabilities and webpage content analysis tools. This server enables AI models to perform Google searches and analyze webpage content programmatically.
4
+
5
+ ## Features
6
+
7
+ - Google Custom Search integration
8
+ - Advanced search features (filters, sorting, pagination, categorization)
9
+ - Webpage content analysis in multiple formats (markdown, HTML, plain text)
10
+ - Batch webpage analysis
11
+ - Result categorization and classification
12
+ - Content summarization
13
+ - Optimized, human-readable responses
14
+ - MCP-compliant interface
15
+
16
+ ## Prerequisites
17
+
18
+ - Node.js (v16 or higher)
19
+ - Google Cloud Platform account
20
+ - Custom Search Engine ID
21
+ - Google API Key
22
+
23
+ ## Installation
24
+
25
+ 1. Clone the repository
26
+ 2. Install Node.js dependencies:
27
+ ```bash
28
+ npm install
29
+ ```
30
+ 3. Build the TypeScript code:
31
+ ```bash
32
+ npm run build
33
+ ```
34
+
35
+ ## Configuration
36
+
37
+ 1. Set up environment variables for your Google API credentials:
38
+
39
+ You can either set these as system environment variables or configure them in your MCP settings file.
40
+
41
+ Required environment variables:
42
+ - `GOOGLE_API_KEY`: Your Google API key
43
+ - `GOOGLE_SEARCH_ENGINE_ID`: Your Custom Search Engine ID
44
+
45
+ 2. Add the server configuration to your MCP settings file (typically located at `%APPDATA%/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`):
46
+ ```json
47
+ {
48
+ "mcpServers": {
49
+ "google-search": {
50
+ "autoApprove": [
51
+ "google_search",
52
+ "extract_webpage_content",
53
+ "extract_multiple_webpages"
54
+ ],
55
+ "disabled": false,
56
+ "timeout": 60,
57
+ "command": "node",
58
+ "args": [
59
+ "/path/to/google-search-mcp-server/dist/google-search.js"
60
+ ],
61
+ "env": {
62
+ "GOOGLE_API_KEY": "your-google-api-key",
63
+ "GOOGLE_SEARCH_ENGINE_ID": "your-custom-search-engine-id"
64
+ },
65
+ "transportType": "stdio"
66
+ }
67
+ }
68
+ }
69
+ ```
70
+
71
+ ## Running
72
+
73
+ Start the MCP server:
74
+ ```bash
75
+ npm run start
76
+ ```
77
+
78
+ ## Available Tools
79
+
80
+ ### 1. google_search
81
+ Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google's search engine.
82
+
83
+ ```typescript
84
+ {
85
+ "name": "google_search",
86
+ "arguments": {
87
+ "query": "your search query",
88
+ "num_results": 5, // optional, default: 5
89
+ "site": "example.com", // optional, limit results to specific website
90
+ "language": "en", // optional, filter by language (ISO 639-1 code)
91
+ "dateRestrict": "m6", // optional, filter by date (e.g., "m6" for last 6 months)
92
+ "exactTerms": "exact phrase", // optional, search for exact phrase
93
+ "resultType": "news", // optional, specify type (news, images, videos)
94
+ "page": 2, // optional, page number for pagination (starts at 1)
95
+ "resultsPerPage": 10, // optional, results per page (max: 10)
96
+ "sort": "date" // optional, sort by "date" or "relevance" (default)
97
+ }
98
+ }
99
+ ```
100
+
101
+ Response includes:
102
+ - Search results with title, link, snippet in a readable format
103
+ - Pagination information (current page, total results, etc.)
104
+ - Categories of results (automatically detected)
105
+ - Navigation hints for pagination
106
+
107
+ ### 2. extract_webpage_content
108
+ Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter.
109
+
110
+ ```typescript
111
+ {
112
+ "name": "extract_webpage_content",
113
+ "arguments": {
114
+ "url": "https://example.com",
115
+ "format": "markdown" // optional, format options: "markdown" (default), "html", or "text"
116
+ }
117
+ }
118
+ ```
119
+
120
+ Response includes:
121
+ - Title and description of the webpage
122
+ - Content statistics (word count, character count)
123
+ - Content summary
124
+ - Content preview (first 500 characters)
125
+
126
+ ### 3. extract_multiple_webpages
127
+ Extract and analyze content from multiple webpages in a single request. Ideal for comparing information across different sources or gathering comprehensive information on a topic.
128
+
129
+ ```typescript
130
+ {
131
+ "name": "extract_multiple_webpages",
132
+ "arguments": {
133
+ "urls": [
134
+ "https://example1.com",
135
+ "https://example2.com"
136
+ ],
137
+ "format": "html" // optional, format options: "markdown" (default), "html", or "text"
138
+ }
139
+ }
140
+ ```
141
+
142
+ Response includes:
143
+ - Title and description of each webpage
144
+ - Content statistics for each webpage
145
+ - Content summary for each webpage
146
+ - Content preview for each webpage (first 150 characters)
147
+
148
+ ## Getting Google API Credentials
149
+
150
+ 1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
151
+ 2. Create a new project or select an existing one
152
+ 3. Enable the Custom Search API
153
+ 4. Create API credentials (API Key)
154
+ 5. Go to the [Custom Search Engine](https://programmablesearchengine.google.com/about/) page
155
+ 6. Create a new search engine and get your Search Engine ID
156
+ 7. Add these credentials to your MCP settings file or set them as environment variables
157
+
158
+ ## Error Handling
159
+
160
+ The server provides detailed error messages for:
161
+ - Missing or invalid API credentials
162
+ - Failed search requests
163
+ - Invalid webpage URLs
164
+ - Network connectivity issues
165
+
166
+ ## Architecture
167
+
168
+ The server is built with TypeScript and uses the MCP SDK to provide a standardized interface for AI models to interact with Google Search and webpage content analysis tools. It consists of two main services:
169
+
170
+ 1. **GoogleSearchService**: Handles Google API interactions for search functionality
171
+ 2. **ContentExtractor**: Manages webpage content analysis and extraction
172
+
173
+ The server uses caching mechanisms to improve performance and reduce API calls.
174
+
175
+ ## Distributing the Built Version
176
+
177
+ If you prefer to distribute only the built version of this tool rather than the source code, you can use the included build script:
178
+
179
+ ```bash
180
+ npm run build:dist
181
+ ```
182
+
183
+ This script will:
184
+ 1. Build the TypeScript code
185
+ 2. Create a distribution package in the `dist-package` directory
186
+ 3. Copy the compiled JavaScript files and necessary package files
187
+ 4. Create a simplified package.json with only production dependencies
188
+
189
+ The distribution package can then be shared with users who don't need access to the source code. Users of the distribution package will need to:
190
+
191
+ 1. Install production dependencies: `npm install --production`
192
+ 2. Configure their Google API credentials as environment variables
193
+ 3. Add the server configuration to their MCP settings file
194
+ 4. Start the server: `npm start`
195
+
196
+ This approach allows you to distribute the compiled JavaScript files without exposing the TypeScript source code.
197
+
198
+ ### Manual Distribution
199
+
200
+ If you prefer to create the distribution package manually, you can use the `build-dist.js` script directly:
201
+
202
+ ```bash
203
+ node build-dist.js
204
+ ```
205
+
206
+ This script provides detailed output about each step of the build process and creates the same distribution package as the `npm run build:dist` command.
207
+
208
+ ## License
209
+
210
+ MIT
@@ -0,0 +1,36 @@
1
+ import axios from 'axios';
2
+ export class ContentFetcher {
3
+ constructor(port = 5001) {
4
+ this.baseUrl = `http://localhost:${port}`;
5
+ }
6
+ async fetchContent(url) {
7
+ try {
8
+ const response = await axios.post(`${this.baseUrl}/analyze`, { url });
9
+ return response.data;
10
+ }
11
+ catch (error) {
12
+ if (axios.isAxiosError(error)) {
13
+ throw new Error(`Failed to fetch content: ${error.response?.data?.error || error.message}`);
14
+ }
15
+ if (error instanceof Error) {
16
+ throw new Error(`Failed to fetch content: ${error.message}`);
17
+ }
18
+ throw new Error('Failed to fetch content: Unknown error');
19
+ }
20
+ }
21
+ async batchFetchContent(urls) {
22
+ try {
23
+ const response = await axios.post(`${this.baseUrl}/batch_analyze`, { urls });
24
+ return response.data;
25
+ }
26
+ catch (error) {
27
+ if (axios.isAxiosError(error)) {
28
+ throw new Error(`Failed to batch fetch content: ${error.response?.data?.error || error.message}`);
29
+ }
30
+ if (error instanceof Error) {
31
+ throw new Error(`Failed to batch fetch content: ${error.message}`);
32
+ }
33
+ throw new Error('Failed to batch fetch content: Unknown error');
34
+ }
35
+ }
36
+ }