pse-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GEMINI.md +72 -0
- package/License.md +3 -0
- package/MCP Documents/README.md +1 -0
- package/MCP Documents/mcp-client-guide.txt +736 -0
- package/MCP Documents/mcp-complete-guide.txt +522 -0
- package/MCP Documents/mcp-enhanced-instructions.md +297 -0
- package/MCP Documents/mcp-server-guide.md +415 -0
- package/MCP Documents/mcp-windows.txt +161 -0
- package/QWEN.md +207 -0
- package/README.md +220 -0
- package/dist/content-fetcher.js +36 -0
- package/dist/google-search.js +421 -0
- package/dist/services/content-extractor.service.js +195 -0
- package/dist/services/google-search.service.js +244 -0
- package/dist/types.js +1 -0
- package/dist-package/README.md +210 -0
- package/dist-package/dist/content-fetcher.js +36 -0
- package/dist-package/dist/google-search.js +420 -0
- package/dist-package/dist/services/content-extractor.service.js +195 -0
- package/dist-package/dist/services/google-search.service.js +244 -0
- package/dist-package/dist/types.js +1 -0
- package/dist-package/package-lock.json +3104 -0
- package/dist-package/package.json +23 -0
- package/license +4 -0
- package/package.json +40 -0
- package/src/google-search.ts +477 -0
- package/src/mcp.d.ts +36 -0
- package/src/services/content-extractor.service.ts +232 -0
- package/src/services/google-search.service.ts +305 -0
- package/src/types.ts +64 -0
- package/tasks.md +141 -0
- package/tsconfig.json +16 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
import { google } from 'googleapis';
|
2
|
+
import { URL } from 'url';
|
3
|
+
export class GoogleSearchService {
|
4
|
+
constructor() {
|
5
|
+
// Cache for search results (key: query string + filters, value: results)
|
6
|
+
this.searchCache = new Map();
|
7
|
+
// Cache expiration time in milliseconds (5 minutes)
|
8
|
+
this.cacheTTL = 5 * 60 * 1000;
|
9
|
+
const apiKey = process.env.GOOGLE_API_KEY;
|
10
|
+
const searchEngineId = process.env.GOOGLE_SEARCH_ENGINE_ID;
|
11
|
+
if (!apiKey || !searchEngineId) {
|
12
|
+
throw new Error('Missing required environment variables: GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID');
|
13
|
+
}
|
14
|
+
// Initialize Google Custom Search API
|
15
|
+
this.customSearch = google.customsearch('v1').cse;
|
16
|
+
this.searchEngineId = searchEngineId;
|
17
|
+
// Set up the API client
|
18
|
+
google.options({
|
19
|
+
auth: apiKey
|
20
|
+
});
|
21
|
+
}
|
22
|
+
/**
|
23
|
+
* Generate a cache key from search parameters
|
24
|
+
*/
|
25
|
+
generateCacheKey(query, numResults, filters) {
|
26
|
+
return JSON.stringify({
|
27
|
+
query,
|
28
|
+
numResults,
|
29
|
+
filters
|
30
|
+
});
|
31
|
+
}
|
32
|
+
/**
|
33
|
+
* Check if a cache entry is still valid
|
34
|
+
*/
|
35
|
+
isCacheValid(entry) {
|
36
|
+
const now = Date.now();
|
37
|
+
return now - entry.timestamp < this.cacheTTL;
|
38
|
+
}
|
39
|
+
/**
|
40
|
+
* Store search results in cache
|
41
|
+
*/
|
42
|
+
cacheSearchResults(cacheKey, results, pagination, categories) {
|
43
|
+
this.searchCache.set(cacheKey, {
|
44
|
+
timestamp: Date.now(),
|
45
|
+
data: { results, pagination, categories }
|
46
|
+
});
|
47
|
+
// Limit cache size to prevent memory issues (max 100 entries)
|
48
|
+
if (this.searchCache.size > 100) {
|
49
|
+
// Delete oldest entry
|
50
|
+
const oldestKey = Array.from(this.searchCache.entries())
|
51
|
+
.sort((a, b) => a[1].timestamp - b[1].timestamp)[0][0];
|
52
|
+
this.searchCache.delete(oldestKey);
|
53
|
+
}
|
54
|
+
}
|
55
|
+
async search(query, numResults = 5, filters) {
|
56
|
+
try {
|
57
|
+
// Generate cache key
|
58
|
+
const cacheKey = this.generateCacheKey(query, numResults, filters);
|
59
|
+
// Check cache first
|
60
|
+
const cachedResult = this.searchCache.get(cacheKey);
|
61
|
+
if (cachedResult && this.isCacheValid(cachedResult)) {
|
62
|
+
console.error('Using cached search results');
|
63
|
+
return cachedResult.data;
|
64
|
+
}
|
65
|
+
let formattedQuery = query;
|
66
|
+
// Apply site filter if provided
|
67
|
+
if (filters?.site) {
|
68
|
+
formattedQuery += ` site:${filters.site}`;
|
69
|
+
}
|
70
|
+
// Apply exact terms if provided
|
71
|
+
if (filters?.exactTerms) {
|
72
|
+
formattedQuery += ` "${filters.exactTerms}"`;
|
73
|
+
}
|
74
|
+
// Set default pagination values if not provided
|
75
|
+
const page = filters?.page && filters.page > 0 ? filters.page : 1;
|
76
|
+
const resultsPerPage = filters?.resultsPerPage ? Math.min(filters.resultsPerPage, 10) : Math.min(numResults, 10);
|
77
|
+
// Calculate start index for pagination (Google uses 1-based indexing)
|
78
|
+
const startIndex = (page - 1) * resultsPerPage + 1;
|
79
|
+
const params = {
|
80
|
+
cx: this.searchEngineId,
|
81
|
+
q: formattedQuery,
|
82
|
+
num: resultsPerPage,
|
83
|
+
start: startIndex
|
84
|
+
};
|
85
|
+
// Apply language filter if provided
|
86
|
+
if (filters?.language) {
|
87
|
+
params.lr = `lang_${filters.language}`;
|
88
|
+
}
|
89
|
+
// Apply date restriction if provided
|
90
|
+
if (filters?.dateRestrict) {
|
91
|
+
params.dateRestrict = filters.dateRestrict;
|
92
|
+
}
|
93
|
+
// Apply result type filter if provided
|
94
|
+
if (filters?.resultType) {
|
95
|
+
switch (filters.resultType.toLowerCase()) {
|
96
|
+
case 'image':
|
97
|
+
case 'images':
|
98
|
+
params.searchType = 'image';
|
99
|
+
break;
|
100
|
+
case 'news':
|
101
|
+
// For news, we need to modify the query
|
102
|
+
formattedQuery += ' source:news';
|
103
|
+
params.q = formattedQuery;
|
104
|
+
break;
|
105
|
+
case 'video':
|
106
|
+
case 'videos':
|
107
|
+
// For videos, we can use a more specific filter
|
108
|
+
formattedQuery += ' filetype:video OR inurl:video OR inurl:watch';
|
109
|
+
params.q = formattedQuery;
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
// Apply sorting if provided
|
114
|
+
if (filters?.sort) {
|
115
|
+
switch (filters.sort.toLowerCase()) {
|
116
|
+
case 'date':
|
117
|
+
// Sort by date (most recent first)
|
118
|
+
params.sort = 'date';
|
119
|
+
break;
|
120
|
+
case 'relevance':
|
121
|
+
default:
|
122
|
+
// Google's default sort is by relevance, so we don't need to specify
|
123
|
+
break;
|
124
|
+
}
|
125
|
+
}
|
126
|
+
const response = await this.customSearch.list(params);
|
127
|
+
// If no items are found, return empty results with pagination info
|
128
|
+
if (!response.data.items) {
|
129
|
+
return {
|
130
|
+
results: [],
|
131
|
+
pagination: {
|
132
|
+
currentPage: page,
|
133
|
+
resultsPerPage,
|
134
|
+
totalResults: 0,
|
135
|
+
totalPages: 0,
|
136
|
+
hasNextPage: false,
|
137
|
+
hasPreviousPage: page > 1
|
138
|
+
},
|
139
|
+
categories: []
|
140
|
+
};
|
141
|
+
}
|
142
|
+
// Map the search results and categorize them
|
143
|
+
const results = response.data.items.map(item => {
|
144
|
+
const result = {
|
145
|
+
title: item.title || '',
|
146
|
+
link: item.link || '',
|
147
|
+
snippet: item.snippet || '',
|
148
|
+
pagemap: item.pagemap || {},
|
149
|
+
datePublished: item.pagemap?.metatags?.[0]?.['article:published_time'] || '',
|
150
|
+
source: 'google_search'
|
151
|
+
};
|
152
|
+
// Add category to the result
|
153
|
+
result.category = this.categorizeResult(result);
|
154
|
+
return result;
|
155
|
+
});
|
156
|
+
// Generate category statistics
|
157
|
+
const categories = this.generateCategoryStats(results);
|
158
|
+
// Create pagination information
|
159
|
+
const totalResults = parseInt(response.data.searchInformation?.totalResults || '0', 10);
|
160
|
+
const totalPages = Math.ceil(totalResults / resultsPerPage);
|
161
|
+
const pagination = {
|
162
|
+
currentPage: page,
|
163
|
+
resultsPerPage,
|
164
|
+
totalResults,
|
165
|
+
totalPages,
|
166
|
+
hasNextPage: page < totalPages,
|
167
|
+
hasPreviousPage: page > 1
|
168
|
+
};
|
169
|
+
// Cache the results before returning
|
170
|
+
this.cacheSearchResults(cacheKey, results, pagination, categories);
|
171
|
+
return {
|
172
|
+
results,
|
173
|
+
pagination,
|
174
|
+
categories
|
175
|
+
};
|
176
|
+
}
|
177
|
+
catch (error) {
|
178
|
+
if (error instanceof Error) {
|
179
|
+
throw new Error(`Google Search API error: ${error.message}`);
|
180
|
+
}
|
181
|
+
throw new Error('Unknown error during Google search');
|
182
|
+
}
|
183
|
+
}
|
184
|
+
/**
|
185
|
+
* Categorizes a search result based on its content
|
186
|
+
* @param result The search result to categorize
|
187
|
+
* @returns The category name
|
188
|
+
*/
|
189
|
+
categorizeResult(result) {
|
190
|
+
try {
|
191
|
+
// Extract the domain from the URL
|
192
|
+
const url = new URL(result.link);
|
193
|
+
const domain = url.hostname.replace(/^www\./, '');
|
194
|
+
// Check if this is a social media site
|
195
|
+
if (domain.match(/facebook\.com|twitter\.com|instagram\.com|linkedin\.com|pinterest\.com|tiktok\.com|reddit\.com/i)) {
|
196
|
+
return 'Social Media';
|
197
|
+
}
|
198
|
+
// Check if this is a video site
|
199
|
+
if (domain.match(/youtube\.com|vimeo\.com|dailymotion\.com|twitch\.tv/i)) {
|
200
|
+
return 'Video';
|
201
|
+
}
|
202
|
+
// Check if this is a news site
|
203
|
+
if (domain.match(/news|cnn\.com|bbc\.com|nytimes\.com|wsj\.com|reuters\.com|bloomberg\.com/i)) {
|
204
|
+
return 'News';
|
205
|
+
}
|
206
|
+
// Check if this is an educational site
|
207
|
+
if (domain.match(/\.edu$|wikipedia\.org|khan|course|learn|study|academic/i)) {
|
208
|
+
return 'Educational';
|
209
|
+
}
|
210
|
+
// Check if this is a documentation site
|
211
|
+
if (domain.match(/docs|documentation|developer|github\.com|gitlab\.com|bitbucket\.org|stackoverflow\.com/i) ||
|
212
|
+
result.title.match(/docs|documentation|api|reference|manual/i)) {
|
213
|
+
return 'Documentation';
|
214
|
+
}
|
215
|
+
// Check if this is a shopping site
|
216
|
+
if (domain.match(/amazon\.com|ebay\.com|etsy\.com|walmart\.com|shop|store|buy/i)) {
|
217
|
+
return 'Shopping';
|
218
|
+
}
|
219
|
+
// Default category based on domain
|
220
|
+
return domain.split('.').slice(-2, -1)[0].charAt(0).toUpperCase() + domain.split('.').slice(-2, -1)[0].slice(1);
|
221
|
+
}
|
222
|
+
catch (error) {
|
223
|
+
// If there's any error in categorization, return a default category
|
224
|
+
return 'Other';
|
225
|
+
}
|
226
|
+
}
|
227
|
+
/**
|
228
|
+
* Generates category statistics from search results
|
229
|
+
* @param results The search results to analyze
|
230
|
+
* @returns An array of category information
|
231
|
+
*/
|
232
|
+
generateCategoryStats(results) {
|
233
|
+
// Count results by category
|
234
|
+
const categoryCounts = {};
|
235
|
+
results.forEach(result => {
|
236
|
+
const category = result.category || 'Other';
|
237
|
+
categoryCounts[category] = (categoryCounts[category] || 0) + 1;
|
238
|
+
});
|
239
|
+
// Convert to array of category info objects
|
240
|
+
return Object.entries(categoryCounts)
|
241
|
+
.map(([name, count]) => ({ name, count }))
|
242
|
+
.sort((a, b) => b.count - a.count); // Sort by count in descending order
|
243
|
+
}
|
244
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|