@humbletoes/google-search 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +339 -0
  3. package/bin/google-search +3 -0
  4. package/bin/google-search-mcp +3 -0
  5. package/bin/google-search-mcp.cmd +2 -0
  6. package/bin/google-search.cmd +2 -0
  7. package/dist/browser-config.d.ts +41 -0
  8. package/dist/browser-config.js +96 -0
  9. package/dist/browser-config.js.map +1 -0
  10. package/dist/browser-pool.d.ts +13 -0
  11. package/dist/browser-pool.js +37 -0
  12. package/dist/browser-pool.js.map +1 -0
  13. package/dist/cache.d.ts +48 -0
  14. package/dist/cache.js +111 -0
  15. package/dist/cache.js.map +1 -0
  16. package/dist/errors.d.ts +26 -0
  17. package/dist/errors.js +48 -0
  18. package/dist/errors.js.map +1 -0
  19. package/dist/filters.d.ts +48 -0
  20. package/dist/filters.js +192 -0
  21. package/dist/filters.js.map +1 -0
  22. package/dist/html-cleaner.d.ts +62 -0
  23. package/dist/html-cleaner.js +236 -0
  24. package/dist/html-cleaner.js.map +1 -0
  25. package/dist/index.d.ts +2 -0
  26. package/dist/index.js +59 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/logger.d.ts +2 -0
  29. package/dist/logger.js +41 -0
  30. package/dist/logger.js.map +1 -0
  31. package/dist/mcp-server.d.ts +9 -0
  32. package/dist/mcp-server.js +822 -0
  33. package/dist/mcp-server.js.map +1 -0
  34. package/dist/search.d.ts +18 -0
  35. package/dist/search.js +1080 -0
  36. package/dist/search.js.map +1 -0
  37. package/dist/types.d.ts +67 -0
  38. package/dist/types.js +2 -0
  39. package/dist/types.js.map +1 -0
  40. package/dist/validation.d.ts +6 -0
  41. package/dist/validation.js +23 -0
  42. package/dist/validation.js.map +1 -0
  43. package/dist/web-fetcher.d.ts +10 -0
  44. package/dist/web-fetcher.js +179 -0
  45. package/dist/web-fetcher.js.map +1 -0
  46. package/package.json +67 -0
  47. package/scripts/setup.js +53 -0
package/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2024 humbletoes
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,339 @@
1
+ # Google Search MCP Server
2
+
3
+ A fast, reliable Google Search tool with Model Context Protocol (MCP) server integration. Bypasses anti-bot detection to provide real-time search capabilities to AI assistants.
4
+
5
+ [![Star History Chart](https://api.star-history.com/svg?repos=web-agent-master/google-search&type=Date)](https://star-history.com/#web-agent-master/google-search&Date)
6
+
7
+ ## Features
8
+
9
+ - **Fast & Reliable**: Advanced anti-bot detection bypass with intelligent caching
10
+ - **MCP Integration**: Native support for Claude and other AI assistants
11
+ - **Metadata Rich**: Returns enhanced results with domain, position, snippet analysis
12
+ - **Browser State**: Automatic state management to minimize verification prompts
13
+ - **HTML Access**: Get raw search page HTML for debugging or analysis
14
+ - **Open Source**: Fully open source, no API keys required
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ # Install from source
20
+ git clone https://github.com/web-agent-master/google-search.git
21
+ cd google-search
22
+ # Install dependencies
23
+ npm install
24
+ # or use yarn
25
+ yarn
26
+ # or use pnpm
27
+ pnpm install
28
+
29
+ # Compile TypeScript code
30
+ npm run build
31
+ # or use yarn
32
+ yarn build
33
+ # or use pnpm
34
+ pnpm build
35
+
36
+ # Link package globally (required for MCP functionality)
37
+ npm link
38
+ # or use yarn
39
+ yarn link
40
+ # or use pnpm
41
+ pnpm link
42
+ ```
43
+
44
+ ### Windows Environment Special Notes
45
+
46
+ In the Windows environment, this tool has been specially adapted:
47
+
48
+ 1. Provides `.cmd` files to ensure command-line tools work properly in Windows Command Prompt and PowerShell
49
+ 2. Log files are stored in the system temporary directory, not the Unix/Linux `/tmp` directory
50
+ 3. Added Windows-specific process signal handling to ensure the server can shut down properly
51
+ 4. Uses cross-platform file path handling, supporting Windows path separators
52
+
53
+ ## Usage
54
+
55
+ ### Command Line Tool
56
+
57
+ ```bash
58
+ # Use command line directly
59
+ google-search "search keywords"
60
+
61
+ # Use command line options
62
+ google-search --limit 5 --timeout 60000 --no-headless "search keywords"
63
+
64
+
65
+ # or use npx
66
+ npx google-search-cli "search keywords"
67
+
68
+ # Run in development mode
69
+ pnpm dev "search keywords"
70
+
71
+ # Run in debug mode (show browser interface)
72
+ pnpm debug "search keywords"
73
+
74
+ # Get raw HTML of search result page
75
+ google-search "search keywords" --get-html
76
+
77
+ # Get HTML and save to file
78
+ google-search "search keywords" --get-html --save-html
79
+
80
+ # Get HTML and save to specified file
81
+ google-search "search keywords" --get-html --save-html --html-output "./output.html"
82
+ ```
83
+
84
+ #### Command Line Options
85
+
86
+ - `-l, --limit <number>`: Result count limit (default: 10)
87
+ - `-t, --timeout <number>`: Timeout (milliseconds, default: 60000)
88
+ - `--no-headless`: Show browser interface (for debugging)
89
+ - `--remote-debugging-port <number>`: Enable remote debugging port (default: 9222)
90
+ - `--state-file <path>`: Browser state file path (default: ./browser-state.json)
91
+ - `--no-save-state`: Do not save browser state
92
+ - `--get-html`: Get raw HTML of search result page instead of parsing results
93
+ - `--save-html`: Save HTML to file (use with --get-html)
94
+ - `--html-output <path>`: Specify HTML output file path (use with --get-html and --save-html)
95
+ - `-V, --version`: Show version number
96
+ - `-h, --help`: Show help information
97
+
98
+ #### Output Example
99
+
100
+ ```json
101
+ {
102
+ "query": "deepseek",
103
+ "results": [
104
+ {
105
+ "title": "DeepSeek",
106
+ "link": "https://www.deepseek.com/",
107
+ "snippet": "DeepSeek-R1 is now live and open source, rivaling OpenAI's Model o1. Available on web, app, and API. Click for details. Into ..."
108
+ },
109
+ {
110
+ "title": "DeepSeek",
111
+ "link": "https://www.deepseek.com/",
112
+ "snippet": "DeepSeek-R1 is now live and open source, rivaling OpenAI's Model o1. Available on web, app, and API. Click for details. Into ..."
113
+ },
114
+ {
115
+ "title": "deepseek-ai/DeepSeek-V3",
116
+ "link": "https://github.com/deepseek-ai/DeepSeek-V3",
117
+ "snippet": "We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token."
118
+ }
119
+ // more results...
120
+ ]
121
+ }
122
+ ```
123
+
124
+ #### HTML Output Example
125
+
126
+ When using the `--get-html` option, the output will include information about the HTML content:
127
+
128
+ ```json
129
+ {
130
+ "query": "playwright automation",
131
+ "url": "https://www.google.com/",
132
+ "originalHtmlLength": 1291733,
133
+ "cleanedHtmlLength": 456789,
134
+ "htmlPreview": "<!DOCTYPE html><html itemscope=\"\" itemtype=\"http://schema.org/SearchResultsPage\" lang=\"zh-CN\"><head><meta charset=\"UTF-8\"><meta content=\"dark light\" name=\"color-scheme\"><meta content=\"origin\" name=\"referrer\">..."
135
+ }
136
+ ```
137
+
138
+ If the `--save-html` option is also used, the output will also include the saved HTML file path:
139
+
140
+ ```json
141
+ {
142
+ "query": "playwright automation",
143
+ "url": "https://www.google.com/",
144
+ "originalHtmlLength": 1292241,
145
+ "cleanedHtmlLength": 458976,
146
+ "savedPath": "./google-search-html/playwright_automation-2025-04-06T03-30-06-852Z.html",
147
+ "screenshotPath": "./google-search-html/playwright_automation-2025-04-06T03-30-06-852Z.png",
148
+ "htmlPreview": "<!DOCTYPE html><html itemscope=\"\" itemtype=\"http://schema.org/SearchResultsPage\" lang=\"zh-CN\">..."
149
+ }
150
+ ```
151
+
152
+ ### MCP Server
153
+
154
+ This project provides Model Context Protocol (MCP) server functionality, allowing AI assistants like Claude to directly use Google search capabilities. MCP is an open protocol that enables AI assistants to securely access external tools and data.
155
+
156
+ ```bash
157
+ # Build project
158
+ pnpm build
159
+ ```
160
+
161
+ #### Integration with Claude Desktop
162
+
163
+ 1. Edit Claude Desktop configuration file
164
+ - Mac: `~/Library/Application Support/Claude/claude_desktop_config.json`
165
+ - Windows: `%APPDATA%\Claude\claude_desktop_config.json`
166
+ - Usually located at `C:\Users\username\AppData\Roaming\Claude\claude_desktop_config.json`
167
+ - You can directly access by entering `%APPDATA%\Claude` in the Windows Explorer address bar
168
+
169
+ 2. Add server configuration and restart Claude
170
+
171
+ ```json
172
+ {
173
+ "mcpServers": {
174
+ "google-search": {
175
+ "command": "npx",
176
+ "args": ["google-search-mcp"]
177
+ }
178
+ }
179
+ }
180
+ ```
181
+
182
+ For Windows environment, you can also use the following configuration scheme:
183
+
184
+ 1. Use cmd.exe with npx:
185
+
186
+ ```json
187
+ {
188
+ "mcpServers": {
189
+ "google-search": {
190
+ "command": "cmd.exe",
191
+ "args": ["/c", "npx", "google-search-mcp"]
192
+ }
193
+ }
194
+ }
195
+ ```
196
+
197
+ 2. Use node with full path (if the above method encounters issues, this is recommended):
198
+
199
+ ```json
200
+ {
201
+ "mcpServers": {
202
+ "google-search": {
203
+ "command": "node",
204
+ "args": ["C:/your/path/google-search/dist/mcp-server.js"]
205
+ }
206
+ }
207
+ }
208
+ ```
209
+
210
+ Note: For the second method, you must replace `C:/your/path/google-search` with the actual full path where the google-search package is installed.
211
+
212
+ After integration, you can directly use search functions in Claude, such as "search for the latest AI research".
213
+
214
+ ## MCP Tools
215
+
216
+ The server provides two powerful tools optimized for AI assistants:
217
+
218
+ ### `google-search`
219
+ **Smart web search and content fetcher.** Automatically detects if input is a URL or search query.
220
+
221
+ #### Search Mode (query string)
222
+ Returns structured results with clickable links.
223
+ - Batch queries supported (array) for concurrent multi-topic research
224
+ - Default 20 results (max 100) with title, URL, snippet
225
+ - Use `condensed=true` for minimal token output
226
+
227
+ #### Fetch Mode (URL input)
228
+ Extracts clean text from webpage.
229
+ - Removes HTML/scripts/ads/navigation
230
+ - Use `maxContentLength` to limit output size
231
+
232
+ **Parameters:**
233
+ | Parameter | Type | Default | Description |
234
+ |-----------|------|---------|-------------|
235
+ | `query` | string \| string[] | required | Search query, URL to fetch, or array for batch |
236
+ | `limit` | number | 20 | Results per query (max: 100, search mode only) |
237
+ | `timeout` | number | 60000/30000 | Timeout in ms |
238
+ | `useCache` | boolean | true | Use cached results (search mode only) |
239
+ | `condensed` | boolean | false | Minimal output: title+URL only |
240
+ | `maxContentLength` | number | unlimited | Max chars for URL fetch content |
241
+
242
+ **Examples:**
243
+ ```
244
+ "react hooks tutorial" → searches Google
245
+ "https://docs.python.org" → fetches page content
246
+ ["typescript generics", "rust traits"] → concurrent batch search
247
+ ```
248
+
249
+ ### `get_code_context`
250
+ **Search for programming documentation, code examples, and API references.**
251
+
252
+ Uses multiple sources including Context7 API for high-quality library documentation.
253
+ Optimized for finding up-to-date context for:
254
+ - Library/framework documentation
255
+ - API reference and usage patterns
256
+ - SDK integration guides
257
+ - Code snippets and best practices
258
+
259
+ Returns condensed code snippets and docs from authoritative sources like GitHub, Stack Overflow, and official documentation sites.
260
+
261
+ **Parameters:**
262
+ | Parameter | Type | Default | Description |
263
+ |-----------|------|---------|-------------|
264
+ | `query` | string | required | Programming topic, library, API, or code pattern |
265
+ | `maxResults` | number | 5 | Sources to search (max: 10) |
266
+ | `maxTokens` | number | 3000 | Approximate max output tokens |
267
+
268
+ **Examples:**
269
+ ```
270
+ "React useState hook examples"
271
+ "Python pandas dataframe filtering"
272
+ "Next.js app router server actions"
273
+ "Express middleware authentication"
274
+ "Prisma ORM schema definition"
275
+ ```
276
+
277
+ **Supported Libraries (auto-detected for enhanced search):**
278
+ React, Next.js, Vue, Angular, Svelte, Express, Django, Flask, FastAPI, Rust/Tokio, Go, TypeScript, Tailwind, Prisma, MongoDB, PostgreSQL, Redis, Docker, Kubernetes, AWS, Firebase, Supabase, Stripe, OpenAI, LangChain, and many more.
279
+
280
+ **Features:**
281
+ - Automatically targets official documentation sites
282
+ - Integrates with Context7 for high-quality library docs
283
+ - Prioritizes authoritative sources (GitHub, Stack Overflow, official docs)
284
+ - Extracts code-relevant content from pages
285
+ - Token-optimized output format
286
+
287
+ ## Project Structure
288
+
289
+ ```
290
+ google-search/
291
+ ├── src/
292
+ │ ├── index.ts # CLI entry point
293
+ │ ├── search.ts # Core search logic with Playwright
294
+ │ ├── mcp-server.ts # MCP server implementation
295
+ │ ├── cache.ts # LRU cache for performance
296
+ │ ├── types.ts # TypeScript type definitions
297
+ │ ├── browser-pool.ts # Browser instance pooling
298
+ │ └── browser-config.ts # Anti-bot detection configuration
299
+ ├── dist/ # Compiled JavaScript output
300
+ ├── bin/ # Executable wrappers
301
+ └── test/ # Test files
302
+ ```
303
+
304
+ ## Tech Stack
305
+
306
+ - **TypeScript** - Type-safe development
307
+ - **Playwright** - Browser automation
308
+ - **MCP SDK** - Model Context Protocol implementation
309
+ - **Zod** - Schema validation
310
+ - **Pino** - Structured logging
311
+
312
+ ## Development
313
+
314
+ ```bash
315
+ # Install dependencies
316
+ pnpm install
317
+
318
+ # Build project
319
+ pnpm build
320
+
321
+ # Run search CLI
322
+ pnpm dev "search query"
323
+
324
+ # Run MCP server
325
+ pnpm mcp
326
+ ```
327
+
328
+ ## Performance
329
+
330
+ - **Caching**: Intelligent LRU cache with 5-minute TTL for repeated queries
331
+ - **Browser Pooling**: Reuses browser instances for faster subsequent searches
332
+ - **State Management**: Persists browser state to minimize verification challenges
333
+
334
+ ## Notes
335
+
336
+ - For educational and research purposes
337
+ - Comply with Google's terms of service
338
+ - Avoid excessive request frequency
339
+ - Browser state file stored in home directory as `.google-search-browser-state.json`
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+
3
+ import '../dist/index.js';
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+
3
+ import '../dist/mcp-server.js';
@@ -0,0 +1,2 @@
1
+ @echo off
2
+ node "%~dp0google-search-mcp" %*
@@ -0,0 +1,2 @@
1
+ @echo off
2
+ node "%~dp0google-search" %*
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Provides secure browser configuration arguments for anti-detection
3
+ */
4
+ import { BrowserContextOptions } from "playwright";
5
+ export interface DeviceConfig {
6
+ deviceName: string;
7
+ deviceConfig: BrowserContextOptions;
8
+ }
9
+ export declare class SecureBrowserConfig {
10
+ /**
11
+ * Get secure browser launch arguments
12
+ * @param includeInsecure - Whether to include insecure flags (should be false for production)
13
+ * @returns Array of browser arguments
14
+ */
15
+ static getArgs(includeInsecure?: boolean): string[];
16
+ /**
17
+ * Default browser arguments for search operations
18
+ * Uses secure defaults without sandbox disabling
19
+ */
20
+ static getDefaultSearchArgs(): string[];
21
+ }
22
+ /**
23
+ * Desktop device names for browser fingerprinting
24
+ */
25
+ export declare const DEVICE_LIST: readonly ["Desktop Chrome", "Desktop Edge", "Desktop Firefox", "Desktop Safari"];
26
+ /**
27
+ * Google domains with English language parameters
28
+ */
29
+ export declare const GOOGLE_DOMAINS: readonly ["https://www.google.com?hl=en&lr=lang_en", "https://www.google.co.uk?hl=en&lr=lang_en", "https://www.google.ca?hl=en&lr=lang_en", "https://www.google.com.au?hl=en&lr=lang_en"];
30
+ /**
31
+ * Get random device configuration from Playwright devices
32
+ * @returns Tuple of [deviceName, deviceConfig]
33
+ */
34
+ export declare function getRandomDeviceConfig(): [string, BrowserContextOptions];
35
+ /**
36
+ * Get random delay between min and max milliseconds
37
+ * @param min - Minimum delay in milliseconds
38
+ * @param max - Maximum delay in milliseconds
39
+ * @returns Random delay in milliseconds
40
+ */
41
+ export declare function getRandomDelay(min: number, max: number): number;
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Provides secure browser configuration arguments for anti-detection
3
+ */
4
+ import { devices } from "playwright";
5
+ export class SecureBrowserConfig {
6
+ /**
7
+ * Get secure browser launch arguments
8
+ * @param includeInsecure - Whether to include insecure flags (should be false for production)
9
+ * @returns Array of browser arguments
10
+ */
11
+ static getArgs(includeInsecure = false) {
12
+ const args = [
13
+ "--disable-blink-features=AutomationControlled",
14
+ "--disable-features=IsolateOrigins,site-per-process",
15
+ "--disable-site-isolation-trials",
16
+ "--disable-dev-shm-usage",
17
+ "--disable-accelerated-2d-canvas",
18
+ "--no-first-run",
19
+ "--no-zygote",
20
+ "--disable-gpu",
21
+ "--hide-scrollbars",
22
+ "--mute-audio",
23
+ "--disable-background-networking",
24
+ "--disable-background-timer-throttling",
25
+ "--disable-backgrounding-occluded-windows",
26
+ "--disable-breakpad",
27
+ "--disable-component-extensions-with-background-pages",
28
+ "--disable-extensions",
29
+ "--disable-features=TranslateUI",
30
+ "--disable-ipc-flooding-protection",
31
+ "--disable-renderer-backgrounding",
32
+ "--enable-features=NetworkService,NetworkServiceInProcess",
33
+ "--force-color-profile=srgb",
34
+ "--metrics-recording-only",
35
+ "--allow-running-insecure-content=false",
36
+ "--disable-javascript-harmony-shipping",
37
+ ];
38
+ // Only add sandbox disabling if explicitly opted in via environment variable
39
+ // This is a security measure to prevent unauthorized sandbox disabling
40
+ if (process.env.GOOGLE_SEARCH_DISABLE_SANDBOX === "true") {
41
+ args.push("--no-sandbox");
42
+ args.push("--disable-setuid-sandbox");
43
+ }
44
+ // Insecure flag should NEVER be enabled in production
45
+ // This is only for specific development/testing scenarios
46
+ if (includeInsecure) {
47
+ // Note: --disable-web-security is intentionally NOT included
48
+ // as it poses significant security risks
49
+ console.warn("WARNING: Insecure browser flags requested. This should only be used in development.");
50
+ }
51
+ return args;
52
+ }
53
+ /**
54
+ * Default browser arguments for search operations
55
+ * Uses secure defaults without sandbox disabling
56
+ */
57
+ static getDefaultSearchArgs() {
58
+ return this.getArgs(false);
59
+ }
60
+ }
61
+ /**
62
+ * Desktop device names for browser fingerprinting
63
+ */
64
+ export const DEVICE_LIST = [
65
+ "Desktop Chrome",
66
+ "Desktop Edge",
67
+ "Desktop Firefox",
68
+ "Desktop Safari",
69
+ ];
70
+ /**
71
+ * Google domains with English language parameters
72
+ */
73
+ export const GOOGLE_DOMAINS = [
74
+ "https://www.google.com?hl=en&lr=lang_en",
75
+ "https://www.google.co.uk?hl=en&lr=lang_en",
76
+ "https://www.google.ca?hl=en&lr=lang_en",
77
+ "https://www.google.com.au?hl=en&lr=lang_en",
78
+ ];
79
+ /**
80
+ * Get random device configuration from Playwright devices
81
+ * @returns Tuple of [deviceName, deviceConfig]
82
+ */
83
+ export function getRandomDeviceConfig() {
84
+ const randomDevice = DEVICE_LIST[Math.floor(Math.random() * DEVICE_LIST.length)];
85
+ return [randomDevice, devices[randomDevice]];
86
+ }
87
+ /**
88
+ * Get random delay between min and max milliseconds
89
+ * @param min - Minimum delay in milliseconds
90
+ * @param max - Maximum delay in milliseconds
91
+ * @returns Random delay in milliseconds
92
+ */
93
+ export function getRandomDelay(min, max) {
94
+ return Math.floor(Math.random() * (max - min + 1)) + min;
95
+ }
96
+ //# sourceMappingURL=browser-config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser-config.js","sourceRoot":"","sources":["../src/browser-config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAyB,MAAM,YAAY,CAAC;AAO5D,MAAM,OAAO,mBAAmB;IAC9B;;;;OAIG;IACH,MAAM,CAAC,OAAO,CAAC,kBAA2B,KAAK;QAC7C,MAAM,IAAI,GAAG;YACX,+CAA+C;YAC/C,oDAAoD;YACpD,iCAAiC;YACjC,yBAAyB;YACzB,iCAAiC;YACjC,gBAAgB;YAChB,aAAa;YACb,eAAe;YACf,mBAAmB;YACnB,cAAc;YACd,iCAAiC;YACjC,uCAAuC;YACvC,0CAA0C;YAC1C,oBAAoB;YACpB,sDAAsD;YACtD,sBAAsB;YACtB,gCAAgC;YAChC,mCAAmC;YACnC,kCAAkC;YAClC,0DAA0D;YAC1D,4BAA4B;YAC5B,0BAA0B;YAC1B,wCAAwC;YACxC,uCAAuC;SACxC,CAAC;QAEF,6EAA6E;QAC7E,uEAAuE;QACvE,IAAI,OAAO,CAAC,GAAG,CAAC,6BAA6B,KAAK,MAAM,EAAE,CAAC;YACzD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC1B,IAAI,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QACxC,CAAC;QAED,sDAAsD;QACtD,0DAA0D;QAC1D,IAAI,eAAe,EAAE,CAAC;YACpB,6DAA6D;YAC7D,yCAAyC;YACzC,OAAO,CAAC,IAAI,CACV,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,oBAAoB;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,gBAAgB;IAChB,cAAc;IACd,iBAAiB;IACjB,gBAAgB;CACR,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG;IAC5B,yCAAyC;IACzC,2CAA2C;IAC3C,wCAAwC;IACxC,4CAA4C;CACpC,CAAC;AAEX;;;GAGG;AACH,MAAM,UAAU,qBAAqB;IACnC,MAAM,YAAY,GAChB,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;IAC9D,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW,EAAE,GAAW;IACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;AAC3D,CAAC"}
@@ -0,0 +1,13 @@
1
+ import { Browser } from "playwright";
2
+ /**
3
+ * Manages a pool of browser instances for efficient reuse
4
+ */
5
+ export declare class BrowserPool {
6
+ private pool;
7
+ private maxSize;
8
+ constructor(maxSize?: number);
9
+ acquire(): Promise<Browser>;
10
+ release(browser: Browser): Promise<void>;
11
+ cleanup(): Promise<void>;
12
+ }
13
+ export declare const browserPool: BrowserPool;
@@ -0,0 +1,37 @@
1
+ import { chromium } from "playwright";
2
+ import { SecureBrowserConfig } from "./browser-config.js";
3
+ /**
4
+ * Manages a pool of browser instances for efficient reuse
5
+ */
6
+ export class BrowserPool {
7
+ constructor(maxSize = 3) {
8
+ this.pool = [];
9
+ this.maxSize = maxSize;
10
+ }
11
+ async acquire() {
12
+ if (this.pool.length > 0) {
13
+ return this.pool.pop();
14
+ }
15
+ return await chromium.launch({
16
+ headless: true,
17
+ args: SecureBrowserConfig.getArgs(),
18
+ ignoreDefaultArgs: ["--enable-automation"],
19
+ });
20
+ }
21
+ async release(browser) {
22
+ if (this.pool.length < this.maxSize) {
23
+ // Reset browser state before returning to pool
24
+ this.pool.push(browser);
25
+ }
26
+ else {
27
+ await browser.close();
28
+ }
29
+ }
30
+ async cleanup() {
31
+ await Promise.all(this.pool.map(browser => browser.close().catch(() => { })));
32
+ this.pool = [];
33
+ }
34
+ }
35
+ // Singleton instance
36
+ export const browserPool = new BrowserPool();
37
+ //# sourceMappingURL=browser-pool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser-pool.js","sourceRoot":"","sources":["../src/browser-pool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAW,MAAM,YAAY,CAAC;AAC/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D;;GAEG;AACH,MAAM,OAAO,WAAW;IAItB,YAAY,UAAkB,CAAC;QAHvB,SAAI,GAAc,EAAE,CAAC;QAI3B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAG,CAAC;QAC1B,CAAC;QACD,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,IAAI;YACd,IAAI,EAAE,mBAAmB,CAAC,OAAO,EAAE;YACnC,iBAAiB,EAAE,CAAC,qBAAqB,CAAC;SAC3C,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,OAAgB;QAC5B,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YACpC,+CAA+C;YAC/C,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7E,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;IACjB,CAAC;CACF;AAED,qBAAqB;AACrB,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Simple LRU cache implementation for search results
3
+ * Reduces redundant searches and improves performance
4
+ */
5
+ import { SearchResponse } from "./types.js";
6
+ export declare class SearchCache {
7
+ private cache;
8
+ private maxSize;
9
+ private ttl;
10
+ private totalHits;
11
+ private totalMisses;
12
+ constructor(maxSize?: number, ttl?: number);
13
+ /**
14
+ * Generate cache key from query and options
15
+ */
16
+ private generateKey;
17
+ /**
18
+ * Get cached result if valid
19
+ */
20
+ get(query: string, limit?: number, ttl?: number): SearchResponse | null;
21
+ /**
22
+ * Store result in cache
23
+ */
24
+ set(query: string, data: SearchResponse, limit?: number, ttl?: number): void;
25
+ /**
26
+ * Clear all cache entries
27
+ */
28
+ clear(): void;
29
+ /**
30
+ * Remove expired entries
31
+ */
32
+ cleanup(): void;
33
+ /**
34
+ * Get cache statistics
35
+ */
36
+ getStats(): {
37
+ size: number;
38
+ maxSize: number;
39
+ ttl: number;
40
+ hits: number;
41
+ misses: number;
42
+ entries: Array<{
43
+ key: string;
44
+ age: number;
45
+ hits: number;
46
+ }>;
47
+ };
48
+ }