crawlforge-mcp-server 3.0.7 → 3.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,236 +1,131 @@
1
- import { customsearch } from '@googleapis/customsearch';
2
- import { RetryManager } from '../../../utils/RetryManager.js';
3
- import { createCircuitBreaker } from '../../../utils/CircuitBreaker.js';
4
- import { logger } from '../../../utils/Logger.js';
1
+ /**
2
+ * Google Custom Search API Adapter
3
+ *
4
+ * Direct integration with Google Custom Search API.
5
+ * Used by Creator Mode when GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID are configured.
6
+ *
7
+ * Requirements:
8
+ * - GOOGLE_API_KEY: Your Google Cloud API key
9
+ * - GOOGLE_SEARCH_ENGINE_ID: Your Custom Search Engine ID (cx)
10
+ *
11
+ * Get credentials at:
12
+ * - API Key: https://console.cloud.google.com/apis/credentials
13
+ * - Search Engine ID: https://programmablesearchengine.google.com/
14
+ */
5
15
 
6
16
  export class GoogleSearchAdapter {
7
- constructor(apiKey, searchEngineId, options = {}) {
8
- if (!apiKey || !searchEngineId) {
9
- throw new Error('Google API key and Search Engine ID are required');
17
+ constructor(apiKey, searchEngineId) {
18
+ if (!apiKey) {
19
+ throw new Error('Google API key is required');
20
+ }
21
+ if (!searchEngineId) {
22
+ throw new Error('Google Search Engine ID (cx) is required');
10
23
  }
11
24
 
12
25
  this.apiKey = apiKey;
13
26
  this.searchEngineId = searchEngineId;
14
- this.customsearch = customsearch('v1');
15
-
16
- // Initialize error handling components
17
- this.retryManager = options.retryManager || RetryManager.createPreset('api');
18
- this.circuitBreaker = options.circuitBreaker || createCircuitBreaker('api');
19
- this.logger = logger.child({ component: 'GoogleSearchAdapter' });
20
-
21
- // Service identifier for circuit breaker
22
- this.serviceId = 'google-search-api';
27
+ this.baseUrl = 'https://www.googleapis.com/customsearch/v1';
23
28
  }
24
29
 
30
+ /**
31
+ * Perform a web search via Google Custom Search API
32
+ * @param {Object} params - Search parameters
33
+ * @param {string} params.query - Search query
34
+ * @param {number} params.num - Number of results (1-10 per request)
35
+ * @param {number} params.start - Starting position (1-based)
36
+ * @param {string} params.lr - Language restriction (e.g., 'lang_en')
37
+ * @param {string} params.safe - Safe search setting ('active' or 'off')
38
+ * @param {string} params.dateRestrict - Date restriction (e.g., 'd1', 'w1', 'm1', 'y1')
39
+ * @param {string} params.cr - Country restriction
40
+ * @param {string} params.siteSearch - Restrict to specific site
41
+ * @param {string} params.fileType - Restrict to file type
42
+ * @returns {Promise<Object>} Search results
43
+ */
25
44
  async search(params) {
26
- const requestId = this.logger.startRequest({
27
- operation: 'search',
28
- query: params.query,
29
- parameters: { ...params, query: '[REDACTED]' } // Don't log sensitive query data
30
- });
31
-
32
45
  try {
33
- // Simplified execution without circuit breaker for now
34
- const executeSearch = async () => {
35
- this.logger.debug('Executing Google search API call', { params }, requestId);
36
-
37
- const response = await this.customsearch.cse.list({
38
- auth: this.apiKey,
39
- cx: this.searchEngineId,
40
- q: params.query,
41
- num: params.num || 10,
42
- start: params.start || 1,
43
- lr: params.lr,
44
- safe: params.safe,
45
- dateRestrict: params.dateRestrict,
46
- siteSearch: params.siteSearch,
47
- siteSearchFilter: params.siteSearchFilter,
48
- fileType: params.fileType,
49
- rights: params.rights,
50
- imgSize: params.imgSize,
51
- imgType: params.imgType,
52
- imgColorType: params.imgColorType,
53
- imgDominantColor: params.imgDominantColor
54
- });
55
-
56
- this.logger.info('Google search API call successful', {
57
- resultsCount: response.data?.items?.length || 0,
58
- searchTime: response.data?.searchInformation?.searchTime
59
- }, requestId);
60
-
61
- return response.data;
62
- };
63
-
64
- // Try to use retry manager if available, otherwise execute directly
65
- let result;
66
- try {
67
- result = await this.retryManager.execute(executeSearch, { operation: 'search', query: params.query });
68
- } catch (retryError) {
69
- // If retry manager fails, try direct execution
70
- this.logger.warn('Retry manager failed, executing directly', { error: retryError.message }, requestId);
71
- result = await executeSearch();
72
- }
73
-
74
- this.logger.endRequest(requestId, {
75
- success: true,
76
- resultsCount: result?.items?.length || 0
77
- });
78
-
79
- return result;
80
- } catch (error) {
81
- this.logger.requestError(requestId, error, {
82
- operation: 'search',
83
- query: params.query
46
+ // Build query parameters
47
+ const searchParams = new URLSearchParams({
48
+ key: this.apiKey,
49
+ cx: this.searchEngineId,
50
+ q: params.query,
51
+ num: Math.min(params.num || 10, 10), // Google API max is 10 per request
52
+ start: params.start || 1,
84
53
  });
85
54
 
86
- // Enhanced error handling with detailed logging
87
- if (error.response) {
88
- const status = error.response.status;
89
- const message = error.response.data?.error?.message || error.message;
90
-
91
- this.logger.warn('Google Search API error response', {
92
- status,
93
- message,
94
- query: params.query
95
- }, requestId);
96
-
97
- if (status === 429) {
98
- throw new Error('API rate limit exceeded. Please try again later.');
99
- } else if (status === 403) {
100
- throw new Error('API access forbidden. Check your API key and permissions.');
101
- } else if (status === 400) {
102
- throw new Error(`Invalid search parameters: ${message}`);
103
- } else if (status >= 500) {
104
- throw new Error(`Google Search API server error (${status}): ${message}`);
105
- }
55
+ // Add optional parameters
56
+ if (params.lr) {
57
+ searchParams.set('lr', params.lr);
106
58
  }
107
-
108
- throw new Error(`Google Search API error: ${error.message}`);
109
- }
110
- }
111
-
112
- async getSuggestions(query) {
113
- // Google doesn't provide suggestions through the Custom Search API
114
- // This could be implemented with a separate API or service
115
- return [];
116
- }
117
-
118
- async getRelatedSearches(query) {
119
- try {
120
- // Perform a search and extract related searches from the response
121
- const response = await this.search({
122
- query,
123
- num: 1
124
- });
125
-
126
- if (response.queries && response.queries.related) {
127
- return response.queries.related.map(r => r.searchTerms);
59
+ if (params.safe) {
60
+ searchParams.set('safe', params.safe);
61
+ }
62
+ if (params.dateRestrict) {
63
+ searchParams.set('dateRestrict', params.dateRestrict);
64
+ }
65
+ if (params.cr) {
66
+ searchParams.set('cr', params.cr);
67
+ }
68
+ if (params.siteSearch) {
69
+ searchParams.set('siteSearch', params.siteSearch);
70
+ }
71
+ if (params.fileType) {
72
+ searchParams.set('fileType', params.fileType);
128
73
  }
129
74
 
130
- return [];
131
- } catch {
132
- return [];
133
- }
134
- }
135
-
136
- async validateApiKey() {
137
- const requestId = this.logger.startRequest({ operation: 'validateApiKey' });
138
-
139
- try {
140
- await this.search({
141
- query: 'test',
142
- num: 1
75
+ const response = await fetch(`${this.baseUrl}?${searchParams.toString()}`, {
76
+ method: 'GET',
77
+ headers: {
78
+ 'Accept': 'application/json',
79
+ }
143
80
  });
144
-
145
- this.logger.endRequest(requestId, { success: true, valid: true });
146
- return true;
147
- } catch (error) {
148
- this.logger.requestError(requestId, error, { operation: 'validateApiKey' });
149
- return false;
150
- }
151
- }
152
-
153
- /**
154
- * Get error handling statistics
155
- * @returns {Object} Statistics from retry manager and circuit breaker
156
- */
157
- getStats() {
158
- return {
159
- retryStats: this.retryManager.getStats(),
160
- circuitBreakerStats: this.circuitBreaker.getStats(),
161
- loggerStats: this.logger.getStats()
162
- };
163
- }
164
-
165
- /**
166
- * Reset error handling statistics
167
- */
168
- resetStats() {
169
- this.retryManager.resetStats();
170
- this.circuitBreaker.reset(this.serviceId);
171
- }
172
81
 
173
- /**
174
- * Get health status of the service
175
- * @returns {Object} Health status information
176
- */
177
- getHealthStatus() {
178
- const circuitStats = this.circuitBreaker.getServiceMetrics(this.serviceId);
179
- const retryStats = this.retryManager.getStats();
180
-
181
- return {
182
- status: circuitStats.state === 'CLOSED' ? 'healthy' : 'degraded',
183
- circuitState: circuitStats.state,
184
- errorRate: circuitStats.errorRate,
185
- successRate: retryStats.successRate,
186
- lastFailure: circuitStats.lastFailure,
187
- nextAttempt: circuitStats.nextAttempt
188
- };
189
- }
190
- }
191
-
192
- export class MockSearchAdapter {
193
- // Mock adapter for testing without API keys
194
- async search(params) {
195
- return {
196
- kind: 'customsearch#search',
197
- searchInformation: {
198
- searchTime: 0.123,
199
- formattedSearchTime: '0.12',
200
- totalResults: '1000',
201
- formattedTotalResults: '1,000'
202
- },
203
- items: [
204
- {
205
- title: `Mock result for: ${params.query}`,
206
- link: `https://example.com/mock/${params.query.replace(/\s+/g, '-')}`,
207
- displayLink: 'example.com',
208
- snippet: `This is a mock search result for the query "${params.query}". It demonstrates the search functionality without requiring API credentials.`,
209
- htmlSnippet: `This is a mock search result for the query "<b>${params.query}</b>". It demonstrates the search functionality without requiring API credentials.`,
210
- formattedUrl: 'https://example.com/mock',
211
- pagemap: {
212
- metatags: [{
213
- 'og:title': `Mock: ${params.query}`,
214
- 'og:description': 'Mock search result description',
215
- 'og:image': 'https://example.com/image.jpg'
216
- }]
82
+ if (!response.ok) {
83
+ let errorMessage = 'Google Search API request failed';
84
+
85
+ try {
86
+ const errorData = await response.json();
87
+ if (errorData.error) {
88
+ errorMessage = errorData.error.message || errorMessage;
89
+
90
+ // Handle specific error cases
91
+ if (response.status === 400) {
92
+ errorMessage = `Invalid request: ${errorMessage}`;
93
+ } else if (response.status === 401) {
94
+ errorMessage = 'Invalid Google API key';
95
+ } else if (response.status === 403) {
96
+ errorMessage = 'Google API access forbidden. Check API key permissions or quota.';
97
+ } else if (response.status === 429) {
98
+ errorMessage = 'Google API quota exceeded. Try again later.';
99
+ }
217
100
  }
101
+ } catch (parseError) {
102
+ errorMessage = `Google Search failed with status ${response.status}: ${response.statusText}`;
218
103
  }
219
- ]
220
- };
221
- }
222
104
 
223
- async getSuggestions(query) {
224
- return [`${query} tutorial`, `${query} examples`, `${query} documentation`];
225
- }
105
+ throw new Error(errorMessage);
106
+ }
226
107
 
227
- async getRelatedSearches(query) {
228
- return [`${query} best practices`, `${query} alternatives`, `how to ${query}`];
229
- }
108
+ const data = await response.json();
109
+
110
+ // Return in standard format
111
+ return {
112
+ items: data.items || [],
113
+ searchInformation: data.searchInformation || {
114
+ totalResults: '0',
115
+ searchTime: 0
116
+ },
117
+ queries: data.queries || {},
118
+ context: data.context || {}
119
+ };
120
+ } catch (error) {
121
+ // Network errors
122
+ if (error.name === 'TypeError' || error.message.includes('fetch')) {
123
+ throw new Error(`Network error connecting to Google Search API: ${error.message}`);
124
+ }
230
125
 
231
- async validateApiKey() {
232
- return true;
126
+ throw error;
127
+ }
233
128
  }
234
129
  }
235
130
 
236
- export default GoogleSearchAdapter;
131
+ export default GoogleSearchAdapter;
@@ -1,96 +1,186 @@
1
+ /**
2
+ * Search Provider Factory
3
+ *
4
+ * Creates search adapter instances.
5
+ * - Production: Uses CrawlForge proxy for Google Search (users with CrawlForge API key)
6
+ * - Creator Mode: Uses Google Search API directly (requires GOOGLE_API_KEY & GOOGLE_SEARCH_ENGINE_ID)
7
+ */
8
+
9
+ import { CrawlForgeSearchAdapter } from './crawlforgeSearch.js';
1
10
  import { GoogleSearchAdapter } from './googleSearch.js';
2
- import { DuckDuckGoSearchAdapter } from './duckduckgoSearch.js';
3
11
 
4
12
  export class SearchProviderFactory {
5
- static createAdapter(provider, options = {}) {
6
- switch (provider.toLowerCase()) {
7
- case 'google':
8
- if (!options.google?.apiKey || !options.google?.searchEngineId) {
9
- throw new Error('Google Search adapter requires apiKey and searchEngineId');
10
- }
11
- return new GoogleSearchAdapter(
12
- options.google.apiKey,
13
- options.google.searchEngineId
13
+ /**
14
+ * Create a search adapter
15
+ * @param {string} apiKey - CrawlForge API key (optional for Creator Mode)
16
+ * @param {Object} options - Configuration options
17
+ * @param {string} options.apiBaseUrl - Custom API base URL (optional)
18
+ * @param {boolean} options.creatorMode - Whether Creator Mode is enabled
19
+ * @param {string} options.googleApiKey - Google API key (for Creator Mode)
20
+ * @param {string} options.googleSearchEngineId - Google Search Engine ID (for Creator Mode)
21
+ * @returns {CrawlForgeSearchAdapter|GoogleSearchAdapter} Search adapter instance
22
+ */
23
+ static createAdapter(apiKey, options = {}) {
24
+ // In Creator Mode without CrawlForge API key, use Google Search API directly
25
+ if (!apiKey && options.creatorMode) {
26
+ const googleApiKey = options.googleApiKey || process.env.GOOGLE_API_KEY;
27
+ const googleSearchEngineId = options.googleSearchEngineId || process.env.GOOGLE_SEARCH_ENGINE_ID;
28
+
29
+ if (!googleApiKey || !googleSearchEngineId) {
30
+ throw new Error(
31
+ 'Creator Mode requires Google Search API credentials. ' +
32
+ 'Set GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables.\n' +
33
+ 'Get credentials at:\n' +
34
+ ' - API Key: https://console.cloud.google.com/apis/credentials\n' +
35
+ ' - Search Engine ID: https://programmablesearchengine.google.com/'
14
36
  );
15
-
16
- case 'duckduckgo':
17
- return new DuckDuckGoSearchAdapter(options.duckduckgo || {});
18
-
19
- default:
20
- throw new Error(`Unsupported search provider: ${provider}`);
37
+ }
38
+
39
+ console.log('🔍 Creator Mode: Using Google Search API directly');
40
+ return new GoogleSearchAdapter(googleApiKey, googleSearchEngineId);
41
+ }
42
+
43
+ // Production mode requires CrawlForge API key
44
+ if (!apiKey) {
45
+ throw new Error('CrawlForge API key is required for search functionality');
21
46
  }
47
+
48
+ return new CrawlForgeSearchAdapter(
49
+ apiKey,
50
+ options.apiBaseUrl || 'https://api.crawlforge.dev'
51
+ );
22
52
  }
23
53
 
54
+ /**
55
+ * Get supported providers
56
+ * @returns {Array<string>} List of supported providers
57
+ */
24
58
  static getSupportedProviders() {
25
- return ['google', 'duckduckgo'];
59
+ return ['crawlforge', 'google'];
26
60
  }
27
61
 
28
- static isProviderAvailable(provider, options = {}) {
29
- try {
30
- SearchProviderFactory.createAdapter(provider, options);
31
- return true;
32
- } catch {
33
- return false;
62
+ /**
63
+ * Check if provider is available
64
+ * @param {string} apiKey - CrawlForge API key
65
+ * @param {Object} options - Configuration options
66
+ * @returns {boolean} True if a provider is available
67
+ */
68
+ static isProviderAvailable(apiKey, options = {}) {
69
+ // CrawlForge API key available
70
+ if (apiKey) return true;
71
+
72
+ // Creator Mode with Google credentials
73
+ if (options.creatorMode) {
74
+ const googleApiKey = options.googleApiKey || process.env.GOOGLE_API_KEY;
75
+ const googleSearchEngineId = options.googleSearchEngineId || process.env.GOOGLE_SEARCH_ENGINE_ID;
76
+ return !!(googleApiKey && googleSearchEngineId);
34
77
  }
78
+
79
+ return false;
35
80
  }
36
81
 
37
- static getProviderCapabilities(provider) {
38
- const capabilities = {
39
- google: {
82
+ /**
83
+ * Get provider capabilities
84
+ * @param {string} provider - Provider name ('crawlforge' or 'google')
85
+ * @returns {Object} Provider capabilities
86
+ */
87
+ static getProviderCapabilities(provider = 'crawlforge') {
88
+ if (provider === 'google') {
89
+ return {
40
90
  requiresApiKey: true,
91
+ apiKeyType: 'Google API key + Search Engine ID',
41
92
  supportsPagination: true,
42
93
  supportsLanguageFilter: true,
43
94
  supportsDateFilter: true,
44
95
  supportsSiteFilter: true,
45
96
  supportsFileTypeFilter: true,
46
97
  supportsSafeSearch: true,
47
- maxResultsPerRequest: 100,
48
- rateLimit: '100 queries per day (free tier)',
98
+ supportsLocalization: true,
99
+ supportsCountryTargeting: true,
100
+ maxResultsPerRequest: 10,
101
+ rateLimit: 'Based on Google API quota',
102
+ creditCost: 'N/A (direct API)',
49
103
  features: [
50
- 'Web search',
51
- 'Image search',
104
+ 'Google Custom Search API',
105
+ 'Full text search',
106
+ 'Image metadata',
52
107
  'Exact phrase matching',
53
108
  'Boolean operators',
54
109
  'Site-specific search',
55
110
  'File type filtering',
56
111
  'Date range filtering',
57
112
  'Language filtering',
113
+ 'Country targeting',
58
114
  'Safe search',
59
- 'Related searches'
115
+ 'Rich snippets'
116
+ ],
117
+ benefits: [
118
+ 'Direct Google API access',
119
+ 'No proxy overhead',
120
+ 'Full Google Search capabilities',
121
+ 'Creator Mode only'
60
122
  ]
61
- },
62
- duckduckgo: {
63
- requiresApiKey: false,
64
- supportsPagination: false, // Limited by API
65
- supportsLanguageFilter: true,
66
- supportsDateFilter: true,
67
- supportsSiteFilter: false, // Not directly supported
68
- supportsFileTypeFilter: false, // Not directly supported
69
- supportsSafeSearch: true,
70
- maxResultsPerRequest: 10, // Limited by instant answer API
71
- rateLimit: 'No explicit limit (be respectful)',
72
- features: [
73
- 'Privacy-focused search',
74
- 'Instant answers',
75
- 'No tracking',
76
- 'Language filtering',
77
- 'Date filtering',
78
- 'Safe search',
79
- 'Autocomplete suggestions'
80
- ]
81
- }
82
- };
123
+ };
124
+ }
83
125
 
84
- return capabilities[provider.toLowerCase()] || null;
126
+ return {
127
+ requiresApiKey: true,
128
+ apiKeyType: 'CrawlForge API key',
129
+ supportsPagination: true,
130
+ supportsLanguageFilter: true,
131
+ supportsDateFilter: true,
132
+ supportsSiteFilter: true,
133
+ supportsFileTypeFilter: true,
134
+ supportsSafeSearch: true,
135
+ supportsLocalization: true,
136
+ supportsCountryTargeting: true,
137
+ maxResultsPerRequest: 100,
138
+ rateLimit: 'Based on your CrawlForge plan',
139
+ creditCost: '2 credits per search',
140
+ features: [
141
+ 'Google Search results via CrawlForge proxy',
142
+ 'No Google API credentials needed',
143
+ 'Full text search',
144
+ 'Image metadata',
145
+ 'Exact phrase matching',
146
+ 'Boolean operators',
147
+ 'Site-specific search',
148
+ 'File type filtering',
149
+ 'Date range filtering',
150
+ 'Language filtering',
151
+ 'Country targeting',
152
+ 'Safe search',
153
+ 'Localization support',
154
+ 'Related searches',
155
+ 'Rich snippets'
156
+ ],
157
+ benefits: [
158
+ 'Simplified authentication (one API key)',
159
+ 'Unified billing through CrawlForge',
160
+ 'Enterprise-grade reliability',
161
+ 'No Google API quota management',
162
+ 'Built-in rate limiting',
163
+ 'Credit-based pricing'
164
+ ]
165
+ };
85
166
  }
86
167
 
168
+ /**
169
+ * Compare providers
170
+ * @returns {Array<Object>} Provider comparison
171
+ */
87
172
  static compareProviders() {
88
- const providers = SearchProviderFactory.getSupportedProviders();
89
- return providers.map(provider => ({
90
- name: provider,
91
- ...SearchProviderFactory.getProviderCapabilities(provider)
92
- }));
173
+ return [
174
+ {
175
+ name: 'crawlforge',
176
+ ...SearchProviderFactory.getProviderCapabilities('crawlforge')
177
+ },
178
+ {
179
+ name: 'google',
180
+ ...SearchProviderFactory.getProviderCapabilities('google')
181
+ }
182
+ ];
93
183
  }
94
184
  }
95
185
 
96
- export default SearchProviderFactory;
186
+ export default SearchProviderFactory;