pse-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ import { google } from 'googleapis';
2
+ import { URL } from 'url';
3
+ export class GoogleSearchService {
4
+ constructor() {
5
+ // Cache for search results (key: query string + filters, value: results)
6
+ this.searchCache = new Map();
7
+ // Cache expiration time in milliseconds (5 minutes)
8
+ this.cacheTTL = 5 * 60 * 1000;
9
+ const apiKey = process.env.GOOGLE_API_KEY;
10
+ const searchEngineId = process.env.GOOGLE_SEARCH_ENGINE_ID;
11
+ if (!apiKey || !searchEngineId) {
12
+ throw new Error('Missing required environment variables: GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID');
13
+ }
14
+ // Initialize Google Custom Search API
15
+ this.customSearch = google.customsearch('v1').cse;
16
+ this.searchEngineId = searchEngineId;
17
+ // Set up the API client
18
+ google.options({
19
+ auth: apiKey
20
+ });
21
+ }
22
+ /**
23
+ * Generate a cache key from search parameters
24
+ */
25
+ generateCacheKey(query, numResults, filters) {
26
+ return JSON.stringify({
27
+ query,
28
+ numResults,
29
+ filters
30
+ });
31
+ }
32
+ /**
33
+ * Check if a cache entry is still valid
34
+ */
35
+ isCacheValid(entry) {
36
+ const now = Date.now();
37
+ return now - entry.timestamp < this.cacheTTL;
38
+ }
39
+ /**
40
+ * Store search results in cache
41
+ */
42
+ cacheSearchResults(cacheKey, results, pagination, categories) {
43
+ this.searchCache.set(cacheKey, {
44
+ timestamp: Date.now(),
45
+ data: { results, pagination, categories }
46
+ });
47
+ // Limit cache size to prevent memory issues (max 100 entries)
48
+ if (this.searchCache.size > 100) {
49
+ // Delete oldest entry
50
+ const oldestKey = Array.from(this.searchCache.entries())
51
+ .sort((a, b) => a[1].timestamp - b[1].timestamp)[0][0];
52
+ this.searchCache.delete(oldestKey);
53
+ }
54
+ }
55
+ async search(query, numResults = 5, filters) {
56
+ try {
57
+ // Generate cache key
58
+ const cacheKey = this.generateCacheKey(query, numResults, filters);
59
+ // Check cache first
60
+ const cachedResult = this.searchCache.get(cacheKey);
61
+ if (cachedResult && this.isCacheValid(cachedResult)) {
62
+ console.error('Using cached search results');
63
+ return cachedResult.data;
64
+ }
65
+ let formattedQuery = query;
66
+ // Apply site filter if provided
67
+ if (filters?.site) {
68
+ formattedQuery += ` site:${filters.site}`;
69
+ }
70
+ // Apply exact terms if provided
71
+ if (filters?.exactTerms) {
72
+ formattedQuery += ` "${filters.exactTerms}"`;
73
+ }
74
+ // Set default pagination values if not provided
75
+ const page = filters?.page && filters.page > 0 ? filters.page : 1;
76
+ const resultsPerPage = filters?.resultsPerPage ? Math.min(filters.resultsPerPage, 10) : Math.min(numResults, 10);
77
+ // Calculate start index for pagination (Google uses 1-based indexing)
78
+ const startIndex = (page - 1) * resultsPerPage + 1;
79
+ const params = {
80
+ cx: this.searchEngineId,
81
+ q: formattedQuery,
82
+ num: resultsPerPage,
83
+ start: startIndex
84
+ };
85
+ // Apply language filter if provided
86
+ if (filters?.language) {
87
+ params.lr = `lang_${filters.language}`;
88
+ }
89
+ // Apply date restriction if provided
90
+ if (filters?.dateRestrict) {
91
+ params.dateRestrict = filters.dateRestrict;
92
+ }
93
+ // Apply result type filter if provided
94
+ if (filters?.resultType) {
95
+ switch (filters.resultType.toLowerCase()) {
96
+ case 'image':
97
+ case 'images':
98
+ params.searchType = 'image';
99
+ break;
100
+ case 'news':
101
+ // For news, we need to modify the query
102
+ formattedQuery += ' source:news';
103
+ params.q = formattedQuery;
104
+ break;
105
+ case 'video':
106
+ case 'videos':
107
+ // For videos, we can use a more specific filter
108
+ formattedQuery += ' filetype:video OR inurl:video OR inurl:watch';
109
+ params.q = formattedQuery;
110
+ break;
111
+ }
112
+ }
113
+ // Apply sorting if provided
114
+ if (filters?.sort) {
115
+ switch (filters.sort.toLowerCase()) {
116
+ case 'date':
117
+ // Sort by date (most recent first)
118
+ params.sort = 'date';
119
+ break;
120
+ case 'relevance':
121
+ default:
122
+ // Google's default sort is by relevance, so we don't need to specify
123
+ break;
124
+ }
125
+ }
126
+ const response = await this.customSearch.list(params);
127
+ // If no items are found, return empty results with pagination info
128
+ if (!response.data.items) {
129
+ return {
130
+ results: [],
131
+ pagination: {
132
+ currentPage: page,
133
+ resultsPerPage,
134
+ totalResults: 0,
135
+ totalPages: 0,
136
+ hasNextPage: false,
137
+ hasPreviousPage: page > 1
138
+ },
139
+ categories: []
140
+ };
141
+ }
142
+ // Map the search results and categorize them
143
+ const results = response.data.items.map(item => {
144
+ const result = {
145
+ title: item.title || '',
146
+ link: item.link || '',
147
+ snippet: item.snippet || '',
148
+ pagemap: item.pagemap || {},
149
+ datePublished: item.pagemap?.metatags?.[0]?.['article:published_time'] || '',
150
+ source: 'google_search'
151
+ };
152
+ // Add category to the result
153
+ result.category = this.categorizeResult(result);
154
+ return result;
155
+ });
156
+ // Generate category statistics
157
+ const categories = this.generateCategoryStats(results);
158
+ // Create pagination information
159
+ const totalResults = parseInt(response.data.searchInformation?.totalResults || '0', 10);
160
+ const totalPages = Math.ceil(totalResults / resultsPerPage);
161
+ const pagination = {
162
+ currentPage: page,
163
+ resultsPerPage,
164
+ totalResults,
165
+ totalPages,
166
+ hasNextPage: page < totalPages,
167
+ hasPreviousPage: page > 1
168
+ };
169
+ // Cache the results before returning
170
+ this.cacheSearchResults(cacheKey, results, pagination, categories);
171
+ return {
172
+ results,
173
+ pagination,
174
+ categories
175
+ };
176
+ }
177
+ catch (error) {
178
+ if (error instanceof Error) {
179
+ throw new Error(`Google Search API error: ${error.message}`);
180
+ }
181
+ throw new Error('Unknown error during Google search');
182
+ }
183
+ }
184
+ /**
185
+ * Categorizes a search result based on its content
186
+ * @param result The search result to categorize
187
+ * @returns The category name
188
+ */
189
+ categorizeResult(result) {
190
+ try {
191
+ // Extract the domain from the URL
192
+ const url = new URL(result.link);
193
+ const domain = url.hostname.replace(/^www\./, '');
194
+ // Check if this is a social media site
195
+ if (domain.match(/facebook\.com|twitter\.com|instagram\.com|linkedin\.com|pinterest\.com|tiktok\.com|reddit\.com/i)) {
196
+ return 'Social Media';
197
+ }
198
+ // Check if this is a video site
199
+ if (domain.match(/youtube\.com|vimeo\.com|dailymotion\.com|twitch\.tv/i)) {
200
+ return 'Video';
201
+ }
202
+ // Check if this is a news site
203
+ if (domain.match(/news|cnn\.com|bbc\.com|nytimes\.com|wsj\.com|reuters\.com|bloomberg\.com/i)) {
204
+ return 'News';
205
+ }
206
+ // Check if this is an educational site
207
+ if (domain.match(/\.edu$|wikipedia\.org|khan|course|learn|study|academic/i)) {
208
+ return 'Educational';
209
+ }
210
+ // Check if this is a documentation site
211
+ if (domain.match(/docs|documentation|developer|github\.com|gitlab\.com|bitbucket\.org|stackoverflow\.com/i) ||
212
+ result.title.match(/docs|documentation|api|reference|manual/i)) {
213
+ return 'Documentation';
214
+ }
215
+ // Check if this is a shopping site
216
+ if (domain.match(/amazon\.com|ebay\.com|etsy\.com|walmart\.com|shop|store|buy/i)) {
217
+ return 'Shopping';
218
+ }
219
+ // Default category based on domain
220
+ return domain.split('.').slice(-2, -1)[0].charAt(0).toUpperCase() + domain.split('.').slice(-2, -1)[0].slice(1);
221
+ }
222
+ catch (error) {
223
+ // If there's any error in categorization, return a default category
224
+ return 'Other';
225
+ }
226
+ }
227
+ /**
228
+ * Generates category statistics from search results
229
+ * @param results The search results to analyze
230
+ * @returns An array of category information
231
+ */
232
+ generateCategoryStats(results) {
233
+ // Count results by category
234
+ const categoryCounts = {};
235
+ results.forEach(result => {
236
+ const category = result.category || 'Other';
237
+ categoryCounts[category] = (categoryCounts[category] || 0) + 1;
238
+ });
239
+ // Convert to array of category info objects
240
+ return Object.entries(categoryCounts)
241
+ .map(([name, count]) => ({ name, count }))
242
+ .sort((a, b) => b.count - a.count); // Sort by count in descending order
243
+ }
244
+ }
@@ -0,0 +1 @@
1
+ export {};