@humbletoes/google-search 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +339 -0
- package/bin/google-search +3 -0
- package/bin/google-search-mcp +3 -0
- package/bin/google-search-mcp.cmd +2 -0
- package/bin/google-search.cmd +2 -0
- package/dist/browser-config.d.ts +41 -0
- package/dist/browser-config.js +96 -0
- package/dist/browser-config.js.map +1 -0
- package/dist/browser-pool.d.ts +13 -0
- package/dist/browser-pool.js +37 -0
- package/dist/browser-pool.js.map +1 -0
- package/dist/cache.d.ts +48 -0
- package/dist/cache.js +111 -0
- package/dist/cache.js.map +1 -0
- package/dist/errors.d.ts +26 -0
- package/dist/errors.js +48 -0
- package/dist/errors.js.map +1 -0
- package/dist/filters.d.ts +48 -0
- package/dist/filters.js +192 -0
- package/dist/filters.js.map +1 -0
- package/dist/html-cleaner.d.ts +62 -0
- package/dist/html-cleaner.js +236 -0
- package/dist/html-cleaner.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +2 -0
- package/dist/logger.js +41 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp-server.d.ts +9 -0
- package/dist/mcp-server.js +822 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/search.d.ts +18 -0
- package/dist/search.js +1080 -0
- package/dist/search.js.map +1 -0
- package/dist/types.d.ts +67 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/validation.d.ts +6 -0
- package/dist/validation.js +23 -0
- package/dist/validation.js.map +1 -0
- package/dist/web-fetcher.d.ts +10 -0
- package/dist/web-fetcher.js +179 -0
- package/dist/web-fetcher.js.map +1 -0
- package/package.json +67 -0
- package/scripts/setup.js +53 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
export class HtmlCleaner {
|
|
2
|
+
/**
|
|
3
|
+
* Extract and clean search results HTML for optimal LLM consumption
|
|
4
|
+
*/
|
|
5
|
+
static extractSearchResults(html) {
|
|
6
|
+
// Remove script tags and their content
|
|
7
|
+
let cleaned = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
8
|
+
// Remove style tags and their content
|
|
9
|
+
cleaned = cleaned.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
10
|
+
// Remove inline styles
|
|
11
|
+
cleaned = cleaned.replace(/\s+style="[^"]*"/gi, '');
|
|
12
|
+
cleaned = cleaned.replace(/\s+style='[^']*'/gi, '');
|
|
13
|
+
// Remove noscript tags
|
|
14
|
+
cleaned = cleaned.replace(/<noscript[^>]*>[\s\S]*?<\/noscript>/gi, '');
|
|
15
|
+
// Remove comments
|
|
16
|
+
cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
|
|
17
|
+
// Remove common ad-related elements
|
|
18
|
+
const adSelectors = [
|
|
19
|
+
/<div[^>]*class="[^"]*ad[^"]*"[^>]*>[\s\S]*?<\/div>/gi,
|
|
20
|
+
/<div[^>]*id="[^"]*ad[^"]*"[^>]*>[\s\S]*?<\/div>/gi,
|
|
21
|
+
/<div[^>]*class="[^"]*ads[^"]*"[^>]*>[\s\S]*?<\/div>/gi,
|
|
22
|
+
/<div[^>]*id="[^"]*ads[^"]*"[^>]*>[\s\S]*?<\/div>/gi,
|
|
23
|
+
/<iframe[^>]*>[\s\S]*?<\/iframe>/gi,
|
|
24
|
+
/<ins[^>]*>[\s\S]*?<\/ins>/gi
|
|
25
|
+
];
|
|
26
|
+
adSelectors.forEach(selector => {
|
|
27
|
+
cleaned = cleaned.replace(selector, '');
|
|
28
|
+
});
|
|
29
|
+
// Remove navigation and header elements that aren't search results
|
|
30
|
+
const navSelectors = [
|
|
31
|
+
/<nav[^>]*>[\s\S]*?<\/nav>/gi,
|
|
32
|
+
/<header[^>]*>[\s\S]*?<\/header>/gi,
|
|
33
|
+
/<footer[^>]*>[\s\S]*?<\/footer>/gi
|
|
34
|
+
];
|
|
35
|
+
navSelectors.forEach(selector => {
|
|
36
|
+
cleaned = cleaned.replace(selector, '');
|
|
37
|
+
});
|
|
38
|
+
// Remove images (keep alt text if present)
|
|
39
|
+
cleaned = cleaned.replace(/<img[^>]*alt="([^"]*)"[^>]*>/gi, ' [Image: $1] ');
|
|
40
|
+
cleaned = cleaned.replace(/<img[^>]*>/gi, ' [Image] ');
|
|
41
|
+
// Clean up excessive whitespace
|
|
42
|
+
cleaned = cleaned.replace(/\s+/g, ' ');
|
|
43
|
+
cleaned = cleaned.replace(/>\s+</g, '><');
|
|
44
|
+
// Remove empty elements
|
|
45
|
+
cleaned = cleaned.replace(/<[^/>]*>\s*<\/[^>]*>/g, '');
|
|
46
|
+
// Focus on search results area - try to extract just the results
|
|
47
|
+
const resultPatterns = [
|
|
48
|
+
/<div[^>]*id="search"[^>]*>([\s\S]*?)<\/div>/i,
|
|
49
|
+
/<div[^>]*id="rso"[^>]*>([\s\S]*?)<\/div>/i,
|
|
50
|
+
/<div[^>]*class="[^"]*srg[^"]*"[^>]*>([\s\S]*?)<\/div>/i
|
|
51
|
+
];
|
|
52
|
+
for (const pattern of resultPatterns) {
|
|
53
|
+
const match = cleaned.match(pattern);
|
|
54
|
+
if (match && match[1]) {
|
|
55
|
+
cleaned = match[1];
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return cleaned.trim();
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Get comprehensive cleaning statistics
|
|
63
|
+
*/
|
|
64
|
+
static getCleaningStats(originalHtml, cleanedHtml) {
|
|
65
|
+
const originalSize = originalHtml.length;
|
|
66
|
+
const cleanedSize = cleanedHtml.length;
|
|
67
|
+
const reductionPercent = originalSize > 0 ? ((originalSize - cleanedSize) / originalSize) * 100 : 0;
|
|
68
|
+
const originalLines = (originalHtml.match(/\n/g) || []).length + 1;
|
|
69
|
+
const cleanedLines = (cleanedHtml.match(/\n/g) || []).length + 1;
|
|
70
|
+
// Count removed elements
|
|
71
|
+
const elementsRemoved = {
|
|
72
|
+
scripts: (originalHtml.match(/<script[^>]*>[\s\S]*?<\/script>/gi) || []).length,
|
|
73
|
+
styles: (originalHtml.match(/<style[^>]*>[\s\S]*?<\/style>/gi) || []).length,
|
|
74
|
+
images: (originalHtml.match(/<img[^>]*>/gi) || []).length,
|
|
75
|
+
ads: (originalHtml.match(/<div[^>]*(?:class|id)="[^"]*(?:ad|ads)[^"]*"[^>]*>/gi) || []).length +
|
|
76
|
+
(originalHtml.match(/<iframe[^>]*>/gi) || []).length,
|
|
77
|
+
navigation: (originalHtml.match(/<nav[^>]*>/gi) || []).length +
|
|
78
|
+
(originalHtml.match(/<header[^>]*>/gi) || []).length +
|
|
79
|
+
(originalHtml.match(/<footer[^>]*>/gi) || []).length,
|
|
80
|
+
comments: (originalHtml.match(/<!--[\s\S]*?-->/g) || []).length
|
|
81
|
+
};
|
|
82
|
+
// Analyze content
|
|
83
|
+
const contentAnalysis = {
|
|
84
|
+
hasSearchResults: this.detectSearchResults(cleanedHtml),
|
|
85
|
+
resultCount: this.countSearchResults(cleanedHtml),
|
|
86
|
+
hasRichSnippets: this.detectRichSnippets(cleanedHtml),
|
|
87
|
+
hasAds: elementsRemoved.ads > 0,
|
|
88
|
+
readabilityScore: this.calculateReadabilityScore(cleanedHtml)
|
|
89
|
+
};
|
|
90
|
+
return {
|
|
91
|
+
originalSize,
|
|
92
|
+
cleanedSize,
|
|
93
|
+
reductionPercent,
|
|
94
|
+
originalLines,
|
|
95
|
+
cleanedLines,
|
|
96
|
+
elementsRemoved,
|
|
97
|
+
contentAnalysis
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Detect if HTML contains search results
|
|
102
|
+
*/
|
|
103
|
+
static detectSearchResults(html) {
|
|
104
|
+
const resultIndicators = [
|
|
105
|
+
/class="[^"]*g[^"]*"/i,
|
|
106
|
+
/class="[^"]*result[^"]*"/i,
|
|
107
|
+
/data-ved=/i,
|
|
108
|
+
/class="[^"]*srg[^"]*"/i,
|
|
109
|
+
/id="search"/i,
|
|
110
|
+
/id="rso"/i
|
|
111
|
+
];
|
|
112
|
+
return resultIndicators.some(pattern => pattern.test(html));
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Count approximate number of search results
|
|
116
|
+
*/
|
|
117
|
+
static countSearchResults(html) {
|
|
118
|
+
// Count result containers
|
|
119
|
+
const resultPatterns = [
|
|
120
|
+
/<div[^>]*class="[^"]*g[^"]*"[^>]*>/gi,
|
|
121
|
+
/<div[^>]*data-ved="[^"]*"[^>]*>/gi,
|
|
122
|
+
/<h3[^>]*>/gi
|
|
123
|
+
];
|
|
124
|
+
let maxCount = 0;
|
|
125
|
+
resultPatterns.forEach(pattern => {
|
|
126
|
+
const matches = html.match(pattern);
|
|
127
|
+
if (matches && matches.length > maxCount) {
|
|
128
|
+
maxCount = matches.length;
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
return maxCount;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Detect rich snippets and structured data
|
|
135
|
+
*/
|
|
136
|
+
static detectRichSnippets(html) {
|
|
137
|
+
const richSnippetIndicators = [
|
|
138
|
+
/class="[^"]*kno-rdesc[^"]*"/i,
|
|
139
|
+
/data-attrid=/i,
|
|
140
|
+
/class="[^"]*featured[^"]*"/i,
|
|
141
|
+
/class="[^"]*answer[^"]*"/i,
|
|
142
|
+
/class="[^"]*knowledge[^"]*"/i,
|
|
143
|
+
/class="[^"]*recipe[^"]*"/i,
|
|
144
|
+
/class="[^"]*event[^"]*"/i,
|
|
145
|
+
/class="[^"]*product[^"]*"/i,
|
|
146
|
+
/class="[^"]*review[^"]*"/i
|
|
147
|
+
];
|
|
148
|
+
return richSnippetIndicators.some(pattern => pattern.test(html));
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Calculate basic readability score
|
|
152
|
+
*/
|
|
153
|
+
static calculateReadabilityScore(html) {
|
|
154
|
+
// Extract text content
|
|
155
|
+
const textContent = html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
156
|
+
if (!textContent)
|
|
157
|
+
return 0;
|
|
158
|
+
const words = textContent.split(/\s+/).length;
|
|
159
|
+
const sentences = (textContent.match(/[.!?]+/g) || []).length || 1;
|
|
160
|
+
const avgWordsPerSentence = words / sentences;
|
|
161
|
+
// Simple readability score (lower is better readability)
|
|
162
|
+
// Based on average words per sentence (ideal is around 15-20)
|
|
163
|
+
const score = Math.abs(avgWordsPerSentence - 17.5) / 17.5;
|
|
164
|
+
// Return score between 0-100 (100 being most readable)
|
|
165
|
+
return Math.max(0, Math.min(100, (1 - score) * 100));
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Extract structured data from HTML (JSON-LD, microdata, etc.)
|
|
169
|
+
*/
|
|
170
|
+
static extractStructuredData(html) {
|
|
171
|
+
const structuredData = [];
|
|
172
|
+
// Extract JSON-LD
|
|
173
|
+
const jsonLdMatches = html.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi);
|
|
174
|
+
if (jsonLdMatches) {
|
|
175
|
+
jsonLdMatches.forEach(match => {
|
|
176
|
+
const jsonMatch = match.match(/<script[^>]*>([\s\S]*?)<\/script>/i);
|
|
177
|
+
if (jsonMatch && jsonMatch[1]) {
|
|
178
|
+
try {
|
|
179
|
+
const data = JSON.parse(jsonMatch[1].trim());
|
|
180
|
+
structuredData.push(data);
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
// Invalid JSON, skip
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
// Extract microdata
|
|
189
|
+
const microdataItems = html.match(/<[^>]*itemtype="[^"]*"[^>]*>/gi);
|
|
190
|
+
if (microdataItems) {
|
|
191
|
+
microdataItems.forEach(item => {
|
|
192
|
+
const typeMatch = item.match(/itemtype="([^"]*)"/i);
|
|
193
|
+
if (typeMatch) {
|
|
194
|
+
structuredData.push({
|
|
195
|
+
type: 'microdata',
|
|
196
|
+
schema: typeMatch[1]
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
return structuredData;
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Extract meta information from HTML
|
|
205
|
+
*/
|
|
206
|
+
static extractMetaInfo(html) {
|
|
207
|
+
const meta = {};
|
|
208
|
+
// Extract title
|
|
209
|
+
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
210
|
+
if (titleMatch) {
|
|
211
|
+
meta.title = titleMatch[1].trim();
|
|
212
|
+
}
|
|
213
|
+
// Extract meta tags
|
|
214
|
+
const metaMatches = html.match(/<meta[^>]*>/gi);
|
|
215
|
+
if (metaMatches) {
|
|
216
|
+
metaMatches.forEach(metaTag => {
|
|
217
|
+
const nameMatch = metaTag.match(/name="([^"]*)"/i);
|
|
218
|
+
const propertyMatch = metaTag.match(/property="([^"]*)"/i);
|
|
219
|
+
const contentMatch = metaTag.match(/content="([^"]*)"/i);
|
|
220
|
+
if (contentMatch) {
|
|
221
|
+
const name = nameMatch ? nameMatch[1] : (propertyMatch ? propertyMatch[1] : null);
|
|
222
|
+
if (name) {
|
|
223
|
+
meta[name.toLowerCase()] = contentMatch[1];
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
// Extract canonical URL
|
|
229
|
+
const canonicalMatch = html.match(/<link[^>]*rel="canonical"[^>]*href="([^"]*)"/i);
|
|
230
|
+
if (canonicalMatch) {
|
|
231
|
+
meta.canonical = canonicalMatch[1];
|
|
232
|
+
}
|
|
233
|
+
return meta;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
//# sourceMappingURL=html-cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-cleaner.js","sourceRoot":"","sources":["../src/html-cleaner.ts"],"names":[],"mappings":"AAuBA,MAAM,OAAO,WAAW;IACtB;;OAEG;IACH,MAAM,CAAC,oBAAoB,CAAC,IAAY;QACtC,uCAAuC;QACvC,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;QAEpE,sCAAsC;QACtC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC,CAAC;QAEjE,uBAAuB;QACvB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;QACpD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;QAEpD,uBAAuB;QACvB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,uCAAuC,EAAE,EAAE,CAAC,CAAC;QAEvE,kBAAkB;QAClB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;QAElD,oCAAoC;QACpC,MAAM,WAAW,GAAG;YAClB,sDAAsD;YACtD,mDAAmD;YACnD,uDAAuD;YACvD,oDAAoD;YACpD,mCAAmC;YACnC,6BAA6B;SAC9B,CAAC;QAEF,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;YAC7B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,mEAAmE;QACnE,MAAM,YAAY,GAAG;YACnB,6BAA6B;YAC7B,mCAAmC;YACnC,mCAAmC;SACpC,CAAC;QAEF,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;YAC9B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,2CAA2C;QAC3C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,gCAAgC,EAAE,eAAe,CAAC,CAAC;QAC7E,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;QAEvD,gCAAgC;QAChC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACvC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAE1C,wBAAwB;QACxB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAC;QAEvD,iEAAiE;QACjE,MAAM,cAAc,GAAG;YACrB,8CAA8C;YAC9C,2CAA2C;YAC3C,wDAAwD;SACzD,CAAC;QAEF,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;YACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACtB,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACnB,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,gBAAgB,CAAC,YAAoB,EAAE,WAAmB;QAC/D,MAAM,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC;QACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;QACvC,MAAM,gBAAgB,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,WAAW,CAAC,GAAG,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEpG,MAAM,aAAa,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QACnE,MAAM,YAAY,GAAG,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAEjE,yBAAyB;QACzB,MAAM,eAAe,GAAG;YACtB,OAAO,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,mCAAmC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;YAC/E,MAAM,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,iCAAiC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;YAC5E,MAAM,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;YACzD,GAAG,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,sDAAsD,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;gBACzF,CAAC,YAAY,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;YACzD,UAAU,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;gBACjD,CAAC,YAAY,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;gBACpD,CAAC,YAAY,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;YAChE,QAAQ,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM;SAChE,CAAC;QAEF,kBAAkB;QAClB,MAAM,eAAe,GAAG;YACtB,gBAAgB,EAAE,IAAI,CAAC,mBAAmB,CAAC,WAAW,CAAC;YACvD,WAAW,EAAE,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC;YACjD,eAAe,EAAE,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC;YACrD,MAAM,EAAE,eAAe,CAAC,GAAG,GAAG,CAAC;YAC/B,gBAAgB,EAAE,IAAI,CAAC,yBAAyB,CAAC,WAAW,CAAC;SAC9D,CAAC;QAEF,OAAO;YACL,YAAY;YACZ,WAAW;YACX,gBAAgB;YAChB,aAAa;YACb,YAAY;YACZ,eAAe;YACf,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,mBAAmB,CAAC,IAAY;QAC7C,MAAM,gBAAgB,GAAG;YACvB,sBAAsB;YACtB,2BAA2B;YAC3B,YAAY;YACZ,wBAAwB;YACxB,cAAc;YACd,WAAW;SACZ,CAAC;QAEF,OAAO,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,kBAAkB,CAAC,IAAY;QAC5C,0BAA0B;QAC1B,MAAM,cAAc,GAAG;YACrB,sCAAsC;YACtC,mCAAmC;YACnC,aAAa;SACd,CAAC;QAEF,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;YAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBACzC,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;YAC5B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,kBAAkB,CAAC,IAAY;QAC5C,MAAM,qBAAqB,GAAG;YAC5B,8BAA8B;YAC9B,eAAe;YACf,6BAA6B;YAC7B,2BAA2B;YAC3B,8BAA8B;YAC9B,2BAA2B;YAC3B,0BAA0B;YAC1B,4BAA4B;YAC5B,2BAA2B;SAC5B,CAAC;QAEF,OAAO,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,yBAAyB,CAAC,IAAY;QACnD,uBAAuB;QACvB,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAE9E,IAAI,CAAC,WAAW;YAAE,OAAO,CAAC,CAAC;QAE3B,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC9C,MAAM,SAAS,GAAG,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;QACnE,MAAM,mBAAmB,GAAG,KAAK,GAAG,SAAS,CAAC;QAE9C,yDAAyD;QACzD,8DAA8D;QAC9D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,mBAAmB,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAE1D,uDAAuD;QACvD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,qBAAqB,CAAC,IAAY;QACvC,MAAM,cAAc,GAAU,EAAE,CAAC;QAEjC,kBAAkB;QAClB,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,sEAAsE,CAAC,CAAC;QACzG,IAAI,aAAa,EAAE,CAAC;YAClB,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;gBAC5B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;gBACpE,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC9B,IAAI,CAAC;wBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBAC7C,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC5B,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,qBAAqB;oBACvB,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpE,IAAI,cAAc,EAAE,CAAC;YACnB,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;gBAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBACpD,IAAI,SAAS,EAAE,CAAC;oBACd,cAAc,CAAC,IAAI,CAAC;wBAClB,IAAI,EAAE,WAAW;wBACjB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;qBACrB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,cAAc,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,eAAe,CAAC,IAAY;QAOjC,MAAM,IAAI,GAAQ,EAAE,CAAC;QAErB,gBAAgB;QAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QAClE,IAAI,UAAU,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACpC,CAAC;QAED,oBAAoB;QACpB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAChD,IAAI,WAAW,EAAE,CAAC;YAChB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;gBAC5B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;gBACnD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBAC3D,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;gBAEzD,IAAI,YAAY,EAAE,CAAC;oBACjB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBAClF,IAAI,IAAI,EAAE,CAAC;wBACT,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;oBAC7C,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,wBAAwB;QACxB,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnF,IAAI,cAAc,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { googleSearch, getGoogleSearchPageHtml } from "./search.js";
|
|
4
|
+
// Get package information
|
|
5
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
6
|
+
// Create command line program
|
|
7
|
+
const program = new Command();
|
|
8
|
+
// Configure command line options
|
|
9
|
+
program
|
|
10
|
+
.name("google-search")
|
|
11
|
+
.description("Google search CLI tool based on Playwright")
|
|
12
|
+
.version(packageJson.version)
|
|
13
|
+
.argument("<query>", "search keywords")
|
|
14
|
+
.option("-l, --limit <number>", "result count limit", parseInt, 10)
|
|
15
|
+
.option("-t, --timeout <number>", "timeout (milliseconds)", parseInt, 30000)
|
|
16
|
+
.option("--no-headless", "deprecated: now always tries headless mode first, automatically switches to headed mode if human verification is encountered")
|
|
17
|
+
.option("--state-file <path>", "browser state file path", "./browser-state.json")
|
|
18
|
+
.option("--no-save-state", "do not save browser state")
|
|
19
|
+
.option("--get-html", "get raw HTML of search result page instead of parsing results")
|
|
20
|
+
.option("--save-html", "save HTML to file")
|
|
21
|
+
.option("--html-output <path>", "HTML output file path")
|
|
22
|
+
.option("--ttl <number>", "cache TTL in milliseconds", parseInt, 300000)
|
|
23
|
+
.action(async (query, options) => {
|
|
24
|
+
try {
|
|
25
|
+
if (options.getHtml) {
|
|
26
|
+
// Get HTML
|
|
27
|
+
const htmlResult = await getGoogleSearchPageHtml(query, options, options.saveHtml || false, options.htmlOutput);
|
|
28
|
+
// If HTML is saved to file, include file path information in output
|
|
29
|
+
if (options.saveHtml && htmlResult.savedPath) {
|
|
30
|
+
console.log(`HTML saved to file: ${htmlResult.savedPath}`);
|
|
31
|
+
}
|
|
32
|
+
// Output result (does not include full HTML to avoid excessive console output)
|
|
33
|
+
const outputResult = {
|
|
34
|
+
query: htmlResult.query,
|
|
35
|
+
url: htmlResult.url,
|
|
36
|
+
originalHtmlLength: htmlResult.originalHtmlLength, // Original HTML length (including CSS and JavaScript)
|
|
37
|
+
cleanedHtmlLength: htmlResult.html.length, // Cleaned HTML length (excluding CSS and JavaScript)
|
|
38
|
+
savedPath: htmlResult.savedPath,
|
|
39
|
+
screenshotPath: htmlResult.screenshotPath, // Webpage screenshot save path
|
|
40
|
+
// Only output the first 500 characters of HTML as preview
|
|
41
|
+
htmlPreview: htmlResult.html.substring(0, 500) + (htmlResult.html.length > 500 ? '...' : '')
|
|
42
|
+
};
|
|
43
|
+
console.log(JSON.stringify(outputResult, null, 2));
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
// Execute regular search
|
|
47
|
+
const results = await googleSearch(query, options);
|
|
48
|
+
// Output result
|
|
49
|
+
console.log(JSON.stringify(results, null, 2));
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
console.error("Error:", error);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
// Parse command line arguments
|
|
58
|
+
program.parse(process.argv);
|
|
59
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,aAAa,CAAC;AAGpE,0BAA0B;AAC1B,OAAO,WAAW,MAAM,iBAAiB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAEhE,8BAA8B;AAC9B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,iCAAiC;AACjC,OAAO;KACJ,IAAI,CAAC,eAAe,CAAC;KACrB,WAAW,CAAC,4CAA4C,CAAC;KACzD,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC;KAC5B,QAAQ,CAAC,SAAS,EAAE,iBAAiB,CAAC;KACtC,MAAM,CAAC,sBAAsB,EAAE,oBAAoB,EAAE,QAAQ,EAAE,EAAE,CAAC;KAClE,MAAM,CAAC,wBAAwB,EAAE,wBAAwB,EAAE,QAAQ,EAAE,KAAK,CAAC;KAC3E,MAAM,CAAC,eAAe,EAAE,8HAA8H,CAAC;KACvJ,MAAM,CAAC,qBAAqB,EAAE,yBAAyB,EAAE,sBAAsB,CAAC;KAChF,MAAM,CAAC,iBAAiB,EAAE,2BAA2B,CAAC;KACtD,MAAM,CAAC,YAAY,EAAE,+DAA+D,CAAC;KACrF,MAAM,CAAC,aAAa,EAAE,mBAAmB,CAAC;KAC1C,MAAM,CAAC,sBAAsB,EAAE,uBAAuB,CAAC;KACvD,MAAM,CAAC,gBAAgB,EAAE,2BAA2B,EAAE,QAAQ,EAAE,MAAM,CAAC;KACvE,MAAM,CAAC,KAAK,EAAE,KAAa,EAAE,OAAwF,EAAE,EAAE;IACxH,IAAI,CAAC;QACH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,WAAW;YACX,MAAM,UAAU,GAAG,MAAM,uBAAuB,CAC9C,KAAK,EACL,OAAO,EACP,OAAO,CAAC,QAAQ,IAAI,KAAK,EACzB,OAAO,CAAC,UAAU,CACnB,CAAC;YAEF,oEAAoE;YACpE,IAAI,OAAO,CAAC,QAAQ,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;gBAC7C,OAAO,CAAC,GAAG,CAAC,uBAAuB,UAAU,CAAC,SAAS,EAAE,CAAC,CAAC;YAC7D,CAAC;YAED,+EAA+E;YAC/E,MAAM,YAAY,GAAG;gBACnB,KAAK,EAAE,UAAU,CAAC,KAAK;gBACvB,GAAG,EAAE,UAAU,CAAC,GAAG;gBACnB,kBAAkB,EAAE,UAAU,CAAC,kBAAkB,EAAE,sDAAsD;gBACzG,iBAAiB,EAAE,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,qDAAqD;gBAChG,SAAS,EAAE,UAAU,CAAC,SAAS;gBAC/B,cAAc,EAAE,UAAU,CAAC,cAAc,EAAE,+BAA+B;gBAC1E,0DAA0D;gBAC1D,WAAW,EAAE,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;aAC7F,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,yBAAyB;YACzB,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAEnD,gBAAgB;YAChB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,+BAA+B;AAC/B,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
package/dist/logger.d.ts
ADDED
package/dist/logger.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { pino } from "pino";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import * as os from "os";
|
|
5
|
+
// Use system temporary directory to ensure cross-platform compatibility
|
|
6
|
+
const logDir = path.join(os.tmpdir(), "google-search-logs");
|
|
7
|
+
if (!fs.existsSync(logDir)) {
|
|
8
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
9
|
+
}
|
|
10
|
+
// Create log file path
|
|
11
|
+
const logFilePath = path.join(logDir, "google-search.log");
|
|
12
|
+
// Create pino logger instance - output only to file, not to console
|
|
13
|
+
// This is crucial for MCP server because stdout is used for JSON-RPC communication
|
|
14
|
+
const logger = pino({
|
|
15
|
+
level: process.env.LOG_LEVEL || "info",
|
|
16
|
+
transport: {
|
|
17
|
+
target: "pino/file",
|
|
18
|
+
level: "trace",
|
|
19
|
+
options: { destination: logFilePath },
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
// Add handling for process exit
|
|
23
|
+
process.on("exit", () => {
|
|
24
|
+
logger.info("Process exit, logging closed");
|
|
25
|
+
});
|
|
26
|
+
process.on("SIGINT", () => {
|
|
27
|
+
logger.info("Received SIGINT signal, logging closed");
|
|
28
|
+
process.exit(0);
|
|
29
|
+
});
|
|
30
|
+
process.on("SIGTERM", () => {
|
|
31
|
+
logger.info("Received SIGTERM signal, logging closed");
|
|
32
|
+
process.exit(0);
|
|
33
|
+
});
|
|
34
|
+
process.on("uncaughtException", (error) => {
|
|
35
|
+
logger.error({ err: error }, "Uncaught exception");
|
|
36
|
+
process.exit(1);
|
|
37
|
+
});
|
|
38
|
+
// Record log file location
|
|
39
|
+
logger.info({ logFilePath }, "Log file location");
|
|
40
|
+
export default logger;
|
|
41
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAEzB,wEAAwE;AACxE,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,CAAC,CAAC;AAC5D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;IAC3B,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AAC5C,CAAC;AAED,uBAAuB;AACvB,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;AAE3D,oEAAoE;AACpE,mFAAmF;AACnF,MAAM,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,MAAM;IACtC,SAAS,EAAE;QACT,MAAM,EAAE,WAAW;QACnB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,EAAE,WAAW,EAAE,WAAW,EAAE;KACtC;CACF,CAAC,CAAC;AAEH,gCAAgC;AAChC,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACtB,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;AAC9C,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;IACxB,MAAM,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;IACzB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;IACxC,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oBAAoB,CAAC,CAAC;IACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,2BAA2B;AAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,EAAE,mBAAmB,CAAC,CAAC;AAElD,eAAe,MAAM,CAAC"}
|