@humbletoes/google-search 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +339 -0
- package/bin/google-search +3 -0
- package/bin/google-search-mcp +3 -0
- package/bin/google-search-mcp.cmd +2 -0
- package/bin/google-search.cmd +2 -0
- package/dist/browser-config.d.ts +41 -0
- package/dist/browser-config.js +96 -0
- package/dist/browser-config.js.map +1 -0
- package/dist/browser-pool.d.ts +13 -0
- package/dist/browser-pool.js +37 -0
- package/dist/browser-pool.js.map +1 -0
- package/dist/cache.d.ts +48 -0
- package/dist/cache.js +111 -0
- package/dist/cache.js.map +1 -0
- package/dist/errors.d.ts +26 -0
- package/dist/errors.js +48 -0
- package/dist/errors.js.map +1 -0
- package/dist/filters.d.ts +48 -0
- package/dist/filters.js +192 -0
- package/dist/filters.js.map +1 -0
- package/dist/html-cleaner.d.ts +62 -0
- package/dist/html-cleaner.js +236 -0
- package/dist/html-cleaner.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +2 -0
- package/dist/logger.js +41 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp-server.d.ts +9 -0
- package/dist/mcp-server.js +822 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/search.d.ts +18 -0
- package/dist/search.js +1080 -0
- package/dist/search.js.map +1 -0
- package/dist/types.d.ts +67 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/validation.d.ts +6 -0
- package/dist/validation.js +23 -0
- package/dist/validation.js.map +1 -0
- package/dist/web-fetcher.d.ts +10 -0
- package/dist/web-fetcher.js +179 -0
- package/dist/web-fetcher.js.map +1 -0
- package/package.json +67 -0
- package/scripts/setup.js +53 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright (c) 2024 humbletoes
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
# Google Search MCP Server
|
|
2
|
+
|
|
3
|
+
A fast, reliable Google Search tool with Model Context Protocol (MCP) server integration. Bypasses anti-bot detection to provide real-time search capabilities to AI assistants.
|
|
4
|
+
|
|
5
|
+
[](https://star-history.com/#web-agent-master/google-search&Date)
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Fast & Reliable**: Advanced anti-bot detection bypass with intelligent caching
|
|
10
|
+
- **MCP Integration**: Native support for Claude and other AI assistants
|
|
11
|
+
- **Metadata Rich**: Returns enhanced results with domain, position, snippet analysis
|
|
12
|
+
- **Browser State**: Automatic state management to minimize verification prompts
|
|
13
|
+
- **HTML Access**: Get raw search page HTML for debugging or analysis
|
|
14
|
+
- **Open Source**: Fully open source, no API keys required
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Install from source
|
|
20
|
+
git clone https://github.com/web-agent-master/google-search.git
|
|
21
|
+
cd google-search
|
|
22
|
+
# Install dependencies
|
|
23
|
+
npm install
|
|
24
|
+
# or use yarn
|
|
25
|
+
yarn
|
|
26
|
+
# or use pnpm
|
|
27
|
+
pnpm install
|
|
28
|
+
|
|
29
|
+
# Compile TypeScript code
|
|
30
|
+
npm run build
|
|
31
|
+
# or use yarn
|
|
32
|
+
yarn build
|
|
33
|
+
# or use pnpm
|
|
34
|
+
pnpm build
|
|
35
|
+
|
|
36
|
+
# Link package globally (required for MCP functionality)
|
|
37
|
+
npm link
|
|
38
|
+
# or use yarn
|
|
39
|
+
yarn link
|
|
40
|
+
# or use pnpm
|
|
41
|
+
pnpm link
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Windows Environment Special Notes
|
|
45
|
+
|
|
46
|
+
In the Windows environment, this tool has been specially adapted:
|
|
47
|
+
|
|
48
|
+
1. Provides `.cmd` files to ensure command-line tools work properly in Windows Command Prompt and PowerShell
|
|
49
|
+
2. Log files are stored in the system temporary directory, not the Unix/Linux `/tmp` directory
|
|
50
|
+
3. Added Windows-specific process signal handling to ensure the server can shut down properly
|
|
51
|
+
4. Uses cross-platform file path handling, supporting Windows path separators
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
### Command Line Tool
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Use command line directly
|
|
59
|
+
google-search "search keywords"
|
|
60
|
+
|
|
61
|
+
# Use command line options
|
|
62
|
+
google-search --limit 5 --timeout 60000 --no-headless "search keywords"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# or use npx
|
|
66
|
+
npx google-search-cli "search keywords"
|
|
67
|
+
|
|
68
|
+
# Run in development mode
|
|
69
|
+
pnpm dev "search keywords"
|
|
70
|
+
|
|
71
|
+
# Run in debug mode (show browser interface)
|
|
72
|
+
pnpm debug "search keywords"
|
|
73
|
+
|
|
74
|
+
# Get raw HTML of search result page
|
|
75
|
+
google-search "search keywords" --get-html
|
|
76
|
+
|
|
77
|
+
# Get HTML and save to file
|
|
78
|
+
google-search "search keywords" --get-html --save-html
|
|
79
|
+
|
|
80
|
+
# Get HTML and save to specified file
|
|
81
|
+
google-search "search keywords" --get-html --save-html --html-output "./output.html"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
#### Command Line Options
|
|
85
|
+
|
|
86
|
+
- `-l, --limit <number>`: Result count limit (default: 10)
|
|
87
|
+
- `-t, --timeout <number>`: Timeout (milliseconds, default: 60000)
|
|
88
|
+
- `--no-headless`: Show browser interface (for debugging)
|
|
89
|
+
- `--remote-debugging-port <number>`: Enable remote debugging port (default: 9222)
|
|
90
|
+
- `--state-file <path>`: Browser state file path (default: ./browser-state.json)
|
|
91
|
+
- `--no-save-state`: Do not save browser state
|
|
92
|
+
- `--get-html`: Get raw HTML of search result page instead of parsing results
|
|
93
|
+
- `--save-html`: Save HTML to file (use with --get-html)
|
|
94
|
+
- `--html-output <path>`: Specify HTML output file path (use with --get-html and --save-html)
|
|
95
|
+
- `-V, --version`: Show version number
|
|
96
|
+
- `-h, --help`: Show help information
|
|
97
|
+
|
|
98
|
+
#### Output Example
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"query": "deepseek",
|
|
103
|
+
"results": [
|
|
104
|
+
{
|
|
105
|
+
"title": "DeepSeek",
|
|
106
|
+
"link": "https://www.deepseek.com/",
|
|
107
|
+
"snippet": "DeepSeek-R1 is now live and open source, rivaling OpenAI's Model o1. Available on web, app, and API. Click for details. Into ..."
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"title": "DeepSeek",
|
|
111
|
+
"link": "https://www.deepseek.com/",
|
|
112
|
+
"snippet": "DeepSeek-R1 is now live and open source, rivaling OpenAI's Model o1. Available on web, app, and API. Click for details. Into ..."
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"title": "deepseek-ai/DeepSeek-V3",
|
|
116
|
+
"link": "https://github.com/deepseek-ai/DeepSeek-V3",
|
|
117
|
+
"snippet": "We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token."
|
|
118
|
+
}
|
|
119
|
+
// more results...
|
|
120
|
+
]
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
#### HTML Output Example
|
|
125
|
+
|
|
126
|
+
When using the `--get-html` option, the output will include information about the HTML content:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"query": "playwright automation",
|
|
131
|
+
"url": "https://www.google.com/",
|
|
132
|
+
"originalHtmlLength": 1291733,
|
|
133
|
+
"cleanedHtmlLength": 456789,
|
|
134
|
+
"htmlPreview": "<!DOCTYPE html><html itemscope=\"\" itemtype=\"http://schema.org/SearchResultsPage\" lang=\"zh-CN\"><head><meta charset=\"UTF-8\"><meta content=\"dark light\" name=\"color-scheme\"><meta content=\"origin\" name=\"referrer\">..."
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
If the `--save-html` option is also used, the output will also include the saved HTML file path:
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"query": "playwright automation",
|
|
143
|
+
"url": "https://www.google.com/",
|
|
144
|
+
"originalHtmlLength": 1292241,
|
|
145
|
+
"cleanedHtmlLength": 458976,
|
|
146
|
+
"savedPath": "./google-search-html/playwright_automation-2025-04-06T03-30-06-852Z.html",
|
|
147
|
+
"screenshotPath": "./google-search-html/playwright_automation-2025-04-06T03-30-06-852Z.png",
|
|
148
|
+
"htmlPreview": "<!DOCTYPE html><html itemscope=\"\" itemtype=\"http://schema.org/SearchResultsPage\" lang=\"zh-CN\">..."
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### MCP Server
|
|
153
|
+
|
|
154
|
+
This project provides Model Context Protocol (MCP) server functionality, allowing AI assistants like Claude to directly use Google search capabilities. MCP is an open protocol that enables AI assistants to securely access external tools and data.
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Build project
|
|
158
|
+
pnpm build
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
#### Integration with Claude Desktop
|
|
162
|
+
|
|
163
|
+
1. Edit Claude Desktop configuration file
|
|
164
|
+
- Mac: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
165
|
+
- Windows: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
166
|
+
- Usually located at `C:\Users\username\AppData\Roaming\Claude\claude_desktop_config.json`
|
|
167
|
+
- You can directly access by entering `%APPDATA%\Claude` in the Windows Explorer address bar
|
|
168
|
+
|
|
169
|
+
2. Add server configuration and restart Claude
|
|
170
|
+
|
|
171
|
+
```json
|
|
172
|
+
{
|
|
173
|
+
"mcpServers": {
|
|
174
|
+
"google-search": {
|
|
175
|
+
"command": "npx",
|
|
176
|
+
"args": ["google-search-mcp"]
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
For Windows environment, you can also use the following configuration scheme:
|
|
183
|
+
|
|
184
|
+
1. Use cmd.exe with npx:
|
|
185
|
+
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"mcpServers": {
|
|
189
|
+
"google-search": {
|
|
190
|
+
"command": "cmd.exe",
|
|
191
|
+
"args": ["/c", "npx", "google-search-mcp"]
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
2. Use node with full path (if the above method encounters issues, this is recommended):
|
|
198
|
+
|
|
199
|
+
```json
|
|
200
|
+
{
|
|
201
|
+
"mcpServers": {
|
|
202
|
+
"google-search": {
|
|
203
|
+
"command": "node",
|
|
204
|
+
"args": ["C:/your/path/google-search/dist/mcp-server.js"]
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Note: For the second method, you must replace `C:/your/path/google-search` with the actual full path where the google-search package is installed.
|
|
211
|
+
|
|
212
|
+
After integration, you can directly use search functions in Claude, such as "search for the latest AI research".
|
|
213
|
+
|
|
214
|
+
## MCP Tools
|
|
215
|
+
|
|
216
|
+
The server provides two powerful tools optimized for AI assistants:
|
|
217
|
+
|
|
218
|
+
### `google-search`
|
|
219
|
+
**Smart web search and content fetcher.** Automatically detects if input is a URL or search query.
|
|
220
|
+
|
|
221
|
+
#### Search Mode (query string)
|
|
222
|
+
Returns structured results with clickable links.
|
|
223
|
+
- Batch queries supported (array) for concurrent multi-topic research
|
|
224
|
+
- Default 20 results (max 100) with title, URL, snippet
|
|
225
|
+
- Use `condensed=true` for minimal token output
|
|
226
|
+
|
|
227
|
+
#### Fetch Mode (URL input)
|
|
228
|
+
Extracts clean text from webpage.
|
|
229
|
+
- Removes HTML/scripts/ads/navigation
|
|
230
|
+
- Use `maxContentLength` to limit output size
|
|
231
|
+
|
|
232
|
+
**Parameters:**
|
|
233
|
+
| Parameter | Type | Default | Description |
|
|
234
|
+
|-----------|------|---------|-------------|
|
|
235
|
+
| `query` | string \| string[] | required | Search query, URL to fetch, or array for batch |
|
|
236
|
+
| `limit` | number | 20 | Results per query (max: 100, search mode only) |
|
|
237
|
+
| `timeout` | number | 60000/30000 | Timeout in ms |
|
|
238
|
+
| `useCache` | boolean | true | Use cached results (search mode only) |
|
|
239
|
+
| `condensed` | boolean | false | Minimal output: title+URL only |
|
|
240
|
+
| `maxContentLength` | number | unlimited | Max chars for URL fetch content |
|
|
241
|
+
|
|
242
|
+
**Examples:**
|
|
243
|
+
```
|
|
244
|
+
"react hooks tutorial" → searches Google
|
|
245
|
+
"https://docs.python.org" → fetches page content
|
|
246
|
+
["typescript generics", "rust traits"] → concurrent batch search
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### `get_code_context`
|
|
250
|
+
**Search for programming documentation, code examples, and API references.**
|
|
251
|
+
|
|
252
|
+
Uses multiple sources including Context7 API for high-quality library documentation.
|
|
253
|
+
Optimized for finding up-to-date context for:
|
|
254
|
+
- Library/framework documentation
|
|
255
|
+
- API reference and usage patterns
|
|
256
|
+
- SDK integration guides
|
|
257
|
+
- Code snippets and best practices
|
|
258
|
+
|
|
259
|
+
Returns condensed code snippets and docs from authoritative sources like GitHub, Stack Overflow, and official documentation sites.
|
|
260
|
+
|
|
261
|
+
**Parameters:**
|
|
262
|
+
| Parameter | Type | Default | Description |
|
|
263
|
+
|-----------|------|---------|-------------|
|
|
264
|
+
| `query` | string | required | Programming topic, library, API, or code pattern |
|
|
265
|
+
| `maxResults` | number | 5 | Sources to search (max: 10) |
|
|
266
|
+
| `maxTokens` | number | 3000 | Approximate max output tokens |
|
|
267
|
+
|
|
268
|
+
**Examples:**
|
|
269
|
+
```
|
|
270
|
+
"React useState hook examples"
|
|
271
|
+
"Python pandas dataframe filtering"
|
|
272
|
+
"Next.js app router server actions"
|
|
273
|
+
"Express middleware authentication"
|
|
274
|
+
"Prisma ORM schema definition"
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Supported Libraries (auto-detected for enhanced search):**
|
|
278
|
+
React, Next.js, Vue, Angular, Svelte, Express, Django, Flask, FastAPI, Rust/Tokio, Go, TypeScript, Tailwind, Prisma, MongoDB, PostgreSQL, Redis, Docker, Kubernetes, AWS, Firebase, Supabase, Stripe, OpenAI, LangChain, and many more.
|
|
279
|
+
|
|
280
|
+
**Features:**
|
|
281
|
+
- Automatically targets official documentation sites
|
|
282
|
+
- Integrates with Context7 for high-quality library docs
|
|
283
|
+
- Prioritizes authoritative sources (GitHub, Stack Overflow, official docs)
|
|
284
|
+
- Extracts code-relevant content from pages
|
|
285
|
+
- Token-optimized output format
|
|
286
|
+
|
|
287
|
+
## Project Structure
|
|
288
|
+
|
|
289
|
+
```
|
|
290
|
+
google-search/
|
|
291
|
+
├── src/
|
|
292
|
+
│ ├── index.ts # CLI entry point
|
|
293
|
+
│ ├── search.ts # Core search logic with Playwright
|
|
294
|
+
│ ├── mcp-server.ts # MCP server implementation
|
|
295
|
+
│ ├── cache.ts # LRU cache for performance
|
|
296
|
+
│ ├── types.ts # TypeScript type definitions
|
|
297
|
+
│ ├── browser-pool.ts # Browser instance pooling
|
|
298
|
+
│ └── browser-config.ts # Anti-bot detection configuration
|
|
299
|
+
├── dist/ # Compiled JavaScript output
|
|
300
|
+
├── bin/ # Executable wrappers
|
|
301
|
+
└── test/ # Test files
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Tech Stack
|
|
305
|
+
|
|
306
|
+
- **TypeScript** - Type-safe development
|
|
307
|
+
- **Playwright** - Browser automation
|
|
308
|
+
- **MCP SDK** - Model Context Protocol implementation
|
|
309
|
+
- **Zod** - Schema validation
|
|
310
|
+
- **Pino** - Structured logging
|
|
311
|
+
|
|
312
|
+
## Development
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
# Install dependencies
|
|
316
|
+
pnpm install
|
|
317
|
+
|
|
318
|
+
# Build project
|
|
319
|
+
pnpm build
|
|
320
|
+
|
|
321
|
+
# Run search CLI
|
|
322
|
+
pnpm dev "search query"
|
|
323
|
+
|
|
324
|
+
# Run MCP server
|
|
325
|
+
pnpm mcp
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## Performance
|
|
329
|
+
|
|
330
|
+
- **Caching**: Intelligent LRU cache with 5-minute TTL for repeated queries
|
|
331
|
+
- **Browser Pooling**: Reuses browser instances for faster subsequent searches
|
|
332
|
+
- **State Management**: Persists browser state to minimize verification challenges
|
|
333
|
+
|
|
334
|
+
## Notes
|
|
335
|
+
|
|
336
|
+
- For educational and research purposes
|
|
337
|
+
- Comply with Google's terms of service
|
|
338
|
+
- Avoid excessive request frequency
|
|
339
|
+
- Browser state file stored in home directory as `.google-search-browser-state.json`
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provides secure browser configuration arguments for anti-detection
|
|
3
|
+
*/
|
|
4
|
+
import { BrowserContextOptions } from "playwright";
|
|
5
|
+
export interface DeviceConfig {
|
|
6
|
+
deviceName: string;
|
|
7
|
+
deviceConfig: BrowserContextOptions;
|
|
8
|
+
}
|
|
9
|
+
export declare class SecureBrowserConfig {
|
|
10
|
+
/**
|
|
11
|
+
* Get secure browser launch arguments
|
|
12
|
+
* @param includeInsecure - Whether to include insecure flags (should be false for production)
|
|
13
|
+
* @returns Array of browser arguments
|
|
14
|
+
*/
|
|
15
|
+
static getArgs(includeInsecure?: boolean): string[];
|
|
16
|
+
/**
|
|
17
|
+
* Default browser arguments for search operations
|
|
18
|
+
* Uses secure defaults without sandbox disabling
|
|
19
|
+
*/
|
|
20
|
+
static getDefaultSearchArgs(): string[];
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Desktop device names for browser fingerprinting
|
|
24
|
+
*/
|
|
25
|
+
export declare const DEVICE_LIST: readonly ["Desktop Chrome", "Desktop Edge", "Desktop Firefox", "Desktop Safari"];
|
|
26
|
+
/**
|
|
27
|
+
* Google domains with English language parameters
|
|
28
|
+
*/
|
|
29
|
+
export declare const GOOGLE_DOMAINS: readonly ["https://www.google.com?hl=en&lr=lang_en", "https://www.google.co.uk?hl=en&lr=lang_en", "https://www.google.ca?hl=en&lr=lang_en", "https://www.google.com.au?hl=en&lr=lang_en"];
|
|
30
|
+
/**
|
|
31
|
+
* Get random device configuration from Playwright devices
|
|
32
|
+
* @returns Tuple of [deviceName, deviceConfig]
|
|
33
|
+
*/
|
|
34
|
+
export declare function getRandomDeviceConfig(): [string, BrowserContextOptions];
|
|
35
|
+
/**
|
|
36
|
+
* Get random delay between min and max milliseconds
|
|
37
|
+
* @param min - Minimum delay in milliseconds
|
|
38
|
+
* @param max - Maximum delay in milliseconds
|
|
39
|
+
* @returns Random delay in milliseconds
|
|
40
|
+
*/
|
|
41
|
+
export declare function getRandomDelay(min: number, max: number): number;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provides secure browser configuration arguments for anti-detection
|
|
3
|
+
*/
|
|
4
|
+
import { devices } from "playwright";
|
|
5
|
+
export class SecureBrowserConfig {
|
|
6
|
+
/**
|
|
7
|
+
* Get secure browser launch arguments
|
|
8
|
+
* @param includeInsecure - Whether to include insecure flags (should be false for production)
|
|
9
|
+
* @returns Array of browser arguments
|
|
10
|
+
*/
|
|
11
|
+
static getArgs(includeInsecure = false) {
|
|
12
|
+
const args = [
|
|
13
|
+
"--disable-blink-features=AutomationControlled",
|
|
14
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
15
|
+
"--disable-site-isolation-trials",
|
|
16
|
+
"--disable-dev-shm-usage",
|
|
17
|
+
"--disable-accelerated-2d-canvas",
|
|
18
|
+
"--no-first-run",
|
|
19
|
+
"--no-zygote",
|
|
20
|
+
"--disable-gpu",
|
|
21
|
+
"--hide-scrollbars",
|
|
22
|
+
"--mute-audio",
|
|
23
|
+
"--disable-background-networking",
|
|
24
|
+
"--disable-background-timer-throttling",
|
|
25
|
+
"--disable-backgrounding-occluded-windows",
|
|
26
|
+
"--disable-breakpad",
|
|
27
|
+
"--disable-component-extensions-with-background-pages",
|
|
28
|
+
"--disable-extensions",
|
|
29
|
+
"--disable-features=TranslateUI",
|
|
30
|
+
"--disable-ipc-flooding-protection",
|
|
31
|
+
"--disable-renderer-backgrounding",
|
|
32
|
+
"--enable-features=NetworkService,NetworkServiceInProcess",
|
|
33
|
+
"--force-color-profile=srgb",
|
|
34
|
+
"--metrics-recording-only",
|
|
35
|
+
"--allow-running-insecure-content=false",
|
|
36
|
+
"--disable-javascript-harmony-shipping",
|
|
37
|
+
];
|
|
38
|
+
// Only add sandbox disabling if explicitly opted in via environment variable
|
|
39
|
+
// This is a security measure to prevent unauthorized sandbox disabling
|
|
40
|
+
if (process.env.GOOGLE_SEARCH_DISABLE_SANDBOX === "true") {
|
|
41
|
+
args.push("--no-sandbox");
|
|
42
|
+
args.push("--disable-setuid-sandbox");
|
|
43
|
+
}
|
|
44
|
+
// Insecure flag should NEVER be enabled in production
|
|
45
|
+
// This is only for specific development/testing scenarios
|
|
46
|
+
if (includeInsecure) {
|
|
47
|
+
// Note: --disable-web-security is intentionally NOT included
|
|
48
|
+
// as it poses significant security risks
|
|
49
|
+
console.warn("WARNING: Insecure browser flags requested. This should only be used in development.");
|
|
50
|
+
}
|
|
51
|
+
return args;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Default browser arguments for search operations
|
|
55
|
+
* Uses secure defaults without sandbox disabling
|
|
56
|
+
*/
|
|
57
|
+
static getDefaultSearchArgs() {
|
|
58
|
+
return this.getArgs(false);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Desktop device names for browser fingerprinting
|
|
63
|
+
*/
|
|
64
|
+
export const DEVICE_LIST = [
|
|
65
|
+
"Desktop Chrome",
|
|
66
|
+
"Desktop Edge",
|
|
67
|
+
"Desktop Firefox",
|
|
68
|
+
"Desktop Safari",
|
|
69
|
+
];
|
|
70
|
+
/**
|
|
71
|
+
* Google domains with English language parameters
|
|
72
|
+
*/
|
|
73
|
+
export const GOOGLE_DOMAINS = [
|
|
74
|
+
"https://www.google.com?hl=en&lr=lang_en",
|
|
75
|
+
"https://www.google.co.uk?hl=en&lr=lang_en",
|
|
76
|
+
"https://www.google.ca?hl=en&lr=lang_en",
|
|
77
|
+
"https://www.google.com.au?hl=en&lr=lang_en",
|
|
78
|
+
];
|
|
79
|
+
/**
|
|
80
|
+
* Get random device configuration from Playwright devices
|
|
81
|
+
* @returns Tuple of [deviceName, deviceConfig]
|
|
82
|
+
*/
|
|
83
|
+
export function getRandomDeviceConfig() {
|
|
84
|
+
const randomDevice = DEVICE_LIST[Math.floor(Math.random() * DEVICE_LIST.length)];
|
|
85
|
+
return [randomDevice, devices[randomDevice]];
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Get random delay between min and max milliseconds
|
|
89
|
+
* @param min - Minimum delay in milliseconds
|
|
90
|
+
* @param max - Maximum delay in milliseconds
|
|
91
|
+
* @returns Random delay in milliseconds
|
|
92
|
+
*/
|
|
93
|
+
export function getRandomDelay(min, max) {
|
|
94
|
+
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=browser-config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser-config.js","sourceRoot":"","sources":["../src/browser-config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAyB,MAAM,YAAY,CAAC;AAO5D,MAAM,OAAO,mBAAmB;IAC9B;;;;OAIG;IACH,MAAM,CAAC,OAAO,CAAC,kBAA2B,KAAK;QAC7C,MAAM,IAAI,GAAG;YACX,+CAA+C;YAC/C,oDAAoD;YACpD,iCAAiC;YACjC,yBAAyB;YACzB,iCAAiC;YACjC,gBAAgB;YAChB,aAAa;YACb,eAAe;YACf,mBAAmB;YACnB,cAAc;YACd,iCAAiC;YACjC,uCAAuC;YACvC,0CAA0C;YAC1C,oBAAoB;YACpB,sDAAsD;YACtD,sBAAsB;YACtB,gCAAgC;YAChC,mCAAmC;YACnC,kCAAkC;YAClC,0DAA0D;YAC1D,4BAA4B;YAC5B,0BAA0B;YAC1B,wCAAwC;YACxC,uCAAuC;SACxC,CAAC;QAEF,6EAA6E;QAC7E,uEAAuE;QACvE,IAAI,OAAO,CAAC,GAAG,CAAC,6BAA6B,KAAK,MAAM,EAAE,CAAC;YACzD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC1B,IAAI,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QACxC,CAAC;QAED,sDAAsD;QACtD,0DAA0D;QAC1D,IAAI,eAAe,EAAE,CAAC;YACpB,6DAA6D;YAC7D,yCAAyC;YACzC,OAAO,CAAC,IAAI,CACV,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,oBAAoB;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,gBAAgB;IAChB,cAAc;IACd,iBAAiB;IACjB,gBAAgB;CACR,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG;IAC5B,yCAAyC;IACzC,2CAA2C;IAC3C,wCAAwC;IACxC,4CAA4C;CACpC,CAAC;AAEX;;;GAGG;AACH,MAAM,UAAU,qBAAqB;IACnC,MAAM,YAAY,GAChB,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;IAC9D,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW,EAAE,GAAW;IACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;AAC3D,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Browser } from "playwright";
|
|
2
|
+
/**
|
|
3
|
+
* Manages a pool of browser instances for efficient reuse
|
|
4
|
+
*/
|
|
5
|
+
export declare class BrowserPool {
|
|
6
|
+
private pool;
|
|
7
|
+
private maxSize;
|
|
8
|
+
constructor(maxSize?: number);
|
|
9
|
+
acquire(): Promise<Browser>;
|
|
10
|
+
release(browser: Browser): Promise<void>;
|
|
11
|
+
cleanup(): Promise<void>;
|
|
12
|
+
}
|
|
13
|
+
export declare const browserPool: BrowserPool;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { chromium } from "playwright";
|
|
2
|
+
import { SecureBrowserConfig } from "./browser-config.js";
|
|
3
|
+
/**
|
|
4
|
+
* Manages a pool of browser instances for efficient reuse
|
|
5
|
+
*/
|
|
6
|
+
export class BrowserPool {
|
|
7
|
+
constructor(maxSize = 3) {
|
|
8
|
+
this.pool = [];
|
|
9
|
+
this.maxSize = maxSize;
|
|
10
|
+
}
|
|
11
|
+
async acquire() {
|
|
12
|
+
if (this.pool.length > 0) {
|
|
13
|
+
return this.pool.pop();
|
|
14
|
+
}
|
|
15
|
+
return await chromium.launch({
|
|
16
|
+
headless: true,
|
|
17
|
+
args: SecureBrowserConfig.getArgs(),
|
|
18
|
+
ignoreDefaultArgs: ["--enable-automation"],
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
async release(browser) {
|
|
22
|
+
if (this.pool.length < this.maxSize) {
|
|
23
|
+
// Reset browser state before returning to pool
|
|
24
|
+
this.pool.push(browser);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
await browser.close();
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
async cleanup() {
|
|
31
|
+
await Promise.all(this.pool.map(browser => browser.close().catch(() => { })));
|
|
32
|
+
this.pool = [];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Singleton instance
|
|
36
|
+
export const browserPool = new BrowserPool();
|
|
37
|
+
//# sourceMappingURL=browser-pool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser-pool.js","sourceRoot":"","sources":["../src/browser-pool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAW,MAAM,YAAY,CAAC;AAC/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D;;GAEG;AACH,MAAM,OAAO,WAAW;IAItB,YAAY,UAAkB,CAAC;QAHvB,SAAI,GAAc,EAAE,CAAC;QAI3B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAG,CAAC;QAC1B,CAAC;QACD,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,IAAI;YACd,IAAI,EAAE,mBAAmB,CAAC,OAAO,EAAE;YACnC,iBAAiB,EAAE,CAAC,qBAAqB,CAAC;SAC3C,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,OAAgB;QAC5B,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YACpC,+CAA+C;YAC/C,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7E,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;IACjB,CAAC;CACF;AAED,qBAAqB;AACrB,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
package/dist/cache.d.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple LRU cache implementation for search results
|
|
3
|
+
* Reduces redundant searches and improves performance
|
|
4
|
+
*/
|
|
5
|
+
import { SearchResponse } from "./types.js";
|
|
6
|
+
export declare class SearchCache {
|
|
7
|
+
private cache;
|
|
8
|
+
private maxSize;
|
|
9
|
+
private ttl;
|
|
10
|
+
private totalHits;
|
|
11
|
+
private totalMisses;
|
|
12
|
+
constructor(maxSize?: number, ttl?: number);
|
|
13
|
+
/**
|
|
14
|
+
* Generate cache key from query and options
|
|
15
|
+
*/
|
|
16
|
+
private generateKey;
|
|
17
|
+
/**
|
|
18
|
+
* Get cached result if valid
|
|
19
|
+
*/
|
|
20
|
+
get(query: string, limit?: number, ttl?: number): SearchResponse | null;
|
|
21
|
+
/**
|
|
22
|
+
* Store result in cache
|
|
23
|
+
*/
|
|
24
|
+
set(query: string, data: SearchResponse, limit?: number, ttl?: number): void;
|
|
25
|
+
/**
|
|
26
|
+
* Clear all cache entries
|
|
27
|
+
*/
|
|
28
|
+
clear(): void;
|
|
29
|
+
/**
|
|
30
|
+
* Remove expired entries
|
|
31
|
+
*/
|
|
32
|
+
cleanup(): void;
|
|
33
|
+
/**
|
|
34
|
+
* Get cache statistics
|
|
35
|
+
*/
|
|
36
|
+
getStats(): {
|
|
37
|
+
size: number;
|
|
38
|
+
maxSize: number;
|
|
39
|
+
ttl: number;
|
|
40
|
+
hits: number;
|
|
41
|
+
misses: number;
|
|
42
|
+
entries: Array<{
|
|
43
|
+
key: string;
|
|
44
|
+
age: number;
|
|
45
|
+
hits: number;
|
|
46
|
+
}>;
|
|
47
|
+
};
|
|
48
|
+
}
|