crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,96 @@
1
+ import { GoogleSearchAdapter } from './googleSearch.js';
2
+ import { DuckDuckGoSearchAdapter } from './duckduckgoSearch.js';
3
+
4
+ export class SearchProviderFactory {
5
+ static createAdapter(provider, options = {}) {
6
+ switch (provider.toLowerCase()) {
7
+ case 'google':
8
+ if (!options.google?.apiKey || !options.google?.searchEngineId) {
9
+ throw new Error('Google Search adapter requires apiKey and searchEngineId');
10
+ }
11
+ return new GoogleSearchAdapter(
12
+ options.google.apiKey,
13
+ options.google.searchEngineId
14
+ );
15
+
16
+ case 'duckduckgo':
17
+ return new DuckDuckGoSearchAdapter(options.duckduckgo || {});
18
+
19
+ default:
20
+ throw new Error(`Unsupported search provider: ${provider}`);
21
+ }
22
+ }
23
+
24
+ static getSupportedProviders() {
25
+ return ['google', 'duckduckgo'];
26
+ }
27
+
28
+ static isProviderAvailable(provider, options = {}) {
29
+ try {
30
+ SearchProviderFactory.createAdapter(provider, options);
31
+ return true;
32
+ } catch {
33
+ return false;
34
+ }
35
+ }
36
+
37
+ static getProviderCapabilities(provider) {
38
+ const capabilities = {
39
+ google: {
40
+ requiresApiKey: true,
41
+ supportsPagination: true,
42
+ supportsLanguageFilter: true,
43
+ supportsDateFilter: true,
44
+ supportsSiteFilter: true,
45
+ supportsFileTypeFilter: true,
46
+ supportsSafeSearch: true,
47
+ maxResultsPerRequest: 100,
48
+ rateLimit: '100 queries per day (free tier)',
49
+ features: [
50
+ 'Web search',
51
+ 'Image search',
52
+ 'Exact phrase matching',
53
+ 'Boolean operators',
54
+ 'Site-specific search',
55
+ 'File type filtering',
56
+ 'Date range filtering',
57
+ 'Language filtering',
58
+ 'Safe search',
59
+ 'Related searches'
60
+ ]
61
+ },
62
+ duckduckgo: {
63
+ requiresApiKey: false,
64
+ supportsPagination: false, // Limited by API
65
+ supportsLanguageFilter: true,
66
+ supportsDateFilter: true,
67
+ supportsSiteFilter: false, // Not directly supported
68
+ supportsFileTypeFilter: false, // Not directly supported
69
+ supportsSafeSearch: true,
70
+ maxResultsPerRequest: 10, // Limited by instant answer API
71
+ rateLimit: 'No explicit limit (be respectful)',
72
+ features: [
73
+ 'Privacy-focused search',
74
+ 'Instant answers',
75
+ 'No tracking',
76
+ 'Language filtering',
77
+ 'Date filtering',
78
+ 'Safe search',
79
+ 'Autocomplete suggestions'
80
+ ]
81
+ }
82
+ };
83
+
84
+ return capabilities[provider.toLowerCase()] || null;
85
+ }
86
+
87
+ static compareProviders() {
88
+ const providers = SearchProviderFactory.getSupportedProviders();
89
+ return providers.map(provider => ({
90
+ name: provider,
91
+ ...SearchProviderFactory.getProviderCapabilities(provider)
92
+ }));
93
+ }
94
+ }
95
+
96
+ export default SearchProviderFactory;
@@ -0,0 +1,543 @@
1
+ import { createHash } from 'crypto';
2
+
3
+ /**
4
+ * QueryExpander - Advanced query expansion and refinement capabilities
5
+ * Provides synonym expansion, misspelling correction, stemming, phrase detection, and boolean operators
6
+ */
7
+ export class QueryExpander {
8
+ constructor(options = {}) {
9
+ this.options = {
10
+ enableSynonyms: options.enableSynonyms !== false,
11
+ enableSpellCheck: options.enableSpellCheck !== false,
12
+ enableStemming: options.enableStemming !== false,
13
+ enablePhraseDetection: options.enablePhraseDetection !== false,
14
+ enableBooleanOperators: options.enableBooleanOperators !== false,
15
+ maxExpansions: options.maxExpansions || 5,
16
+ ...options
17
+ };
18
+
19
+ // Initialize dictionaries
20
+ this.synonymDict = this._initializeSynonyms();
21
+ this.spellingDict = this._initializeSpellingCorrections();
22
+ this.stopWords = this._initializeStopWords();
23
+ }
24
+
25
+ /**
26
+ * Main expansion method - orchestrates all expansion techniques
27
+ * @param {string} query - The original search query
28
+ * @param {Object} options - Optional expansion preferences
29
+ * @returns {Array<string>} - Array of expanded query variations
30
+ */
31
+ async expandQuery(query, options = {}) {
32
+ if (!query || typeof query !== 'string') {
33
+ throw new Error('Query must be a non-empty string');
34
+ }
35
+
36
+ const expansionOptions = { ...this.options, ...options };
37
+ const expansions = new Set();
38
+
39
+ try {
40
+ // Always include the original query
41
+ expansions.add(query.trim());
42
+
43
+ // Apply different expansion strategies
44
+ if (expansionOptions.enableSynonyms) {
45
+ const synonymExpansions = this.expandWithSynonyms(query);
46
+ synonymExpansions.forEach(exp => expansions.add(exp));
47
+ }
48
+
49
+ if (expansionOptions.enableSpellCheck) {
50
+ const correctedExpansions = this.correctSpelling(query);
51
+ correctedExpansions.forEach(exp => expansions.add(exp));
52
+ }
53
+
54
+ if (expansionOptions.enableStemming) {
55
+ const stemmedExpansions = this.expandWithStemming(query);
56
+ stemmedExpansions.forEach(exp => expansions.add(exp));
57
+ }
58
+
59
+ if (expansionOptions.enablePhraseDetection) {
60
+ const phraseExpansions = this.expandWithPhrases(query);
61
+ phraseExpansions.forEach(exp => expansions.add(exp));
62
+ }
63
+
64
+ if (expansionOptions.enableBooleanOperators) {
65
+ const booleanExpansions = this.expandWithBooleanOperators(query);
66
+ booleanExpansions.forEach(exp => expansions.add(exp));
67
+ }
68
+
69
+ // Convert to array and limit results
70
+ const result = Array.from(expansions)
71
+ .filter(exp => exp.length > 0)
72
+ .slice(0, expansionOptions.maxExpansions);
73
+
74
+ return result;
75
+ } catch (error) {
76
+ console.error('Query expansion error:', error);
77
+ return [query]; // Fallback to original query
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Expand query with synonyms
83
+ * @param {string} query
84
+ * @returns {Array<string>}
85
+ */
86
+ expandWithSynonyms(query) {
87
+ const expansions = [];
88
+ const words = this._tokenize(query);
89
+
90
+ // Replace individual words with synonyms
91
+ words.forEach((word, index) => {
92
+ const synonyms = this.synonymDict[word.toLowerCase()];
93
+ if (synonyms && synonyms.length > 0) {
94
+ synonyms.forEach(synonym => {
95
+ const newWords = [...words];
96
+ newWords[index] = synonym;
97
+ expansions.push(newWords.join(' '));
98
+ });
99
+ }
100
+ });
101
+
102
+ // Create OR variations with synonyms
103
+ const synonymGroups = words.map(word => {
104
+ const synonyms = this.synonymDict[word.toLowerCase()];
105
+ return synonyms && synonyms.length > 0 ? [word, ...synonyms.slice(0, 2)] : [word];
106
+ });
107
+
108
+ if (synonymGroups.some(group => group.length > 1)) {
109
+ // Create combinations with most relevant synonyms
110
+ const combinations = this._generateCombinations(synonymGroups, 3);
111
+ combinations.forEach(combo => {
112
+ if (combo.join(' ') !== query) {
113
+ expansions.push(combo.join(' '));
114
+ }
115
+ });
116
+ }
117
+
118
+ return expansions;
119
+ }
120
+
121
+ /**
122
+ * Correct common misspellings
123
+ * @param {string} query
124
+ * @returns {Array<string>}
125
+ */
126
+ correctSpelling(query) {
127
+ const expansions = [];
128
+ const words = this._tokenize(query);
129
+ let hasCorrections = false;
130
+
131
+ const correctedWords = words.map(word => {
132
+ const correction = this.spellingDict[word.toLowerCase()];
133
+ if (correction && correction !== word.toLowerCase()) {
134
+ hasCorrections = true;
135
+ return correction;
136
+ }
137
+ return word;
138
+ });
139
+
140
+ if (hasCorrections) {
141
+ expansions.push(correctedWords.join(' '));
142
+ }
143
+
144
+ return expansions;
145
+ }
146
+
147
+ /**
148
+ * Apply word stemming for broader matches
149
+ * @param {string} query
150
+ * @returns {Array<string>}
151
+ */
152
+ expandWithStemming(query) {
153
+ const expansions = [];
154
+ const words = this._tokenize(query);
155
+
156
+ // Apply simple stemming rules
157
+ const stemmedWords = words.map(word => this._applyStemming(word));
158
+ const stemmedQuery = stemmedWords.join(' ');
159
+
160
+ if (stemmedQuery !== query) {
161
+ expansions.push(stemmedQuery);
162
+ }
163
+
164
+ // Create variations with stemmed and original words
165
+ const mixedVariations = this._createStemMixVariations(words, stemmedWords);
166
+ expansions.push(...mixedVariations);
167
+
168
+ return expansions;
169
+ }
170
+
171
+ /**
172
+ * Detect and handle phrases
173
+ * @param {string} query
174
+ * @returns {Array<string>}
175
+ */
176
+ expandWithPhrases(query) {
177
+ const expansions = [];
178
+
179
+ // Add quoted phrases if not already present
180
+ if (!query.includes('"') && query.includes(' ')) {
181
+ expansions.push(`"${query}"`);
182
+ }
183
+
184
+ // Detect potential phrases (consecutive capitalized words, common phrase patterns)
185
+ const phrases = this._detectPhrases(query);
186
+ phrases.forEach(phrase => {
187
+ if (phrase.length > query.length * 0.5) { // Only for substantial phrases
188
+ expansions.push(`"${phrase}"`);
189
+ }
190
+ });
191
+
192
+ // Break down complex queries into sub-phrases
193
+ if (query.split(' ').length > 3) {
194
+ const subPhrases = this._generateSubPhrases(query);
195
+ subPhrases.forEach(phrase => {
196
+ if (phrase !== query) {
197
+ expansions.push(phrase);
198
+ }
199
+ });
200
+ }
201
+
202
+ return expansions;
203
+ }
204
+
205
+ /**
206
+ * Add boolean operator variations
207
+ * @param {string} query
208
+ * @returns {Array<string>}
209
+ */
210
+ expandWithBooleanOperators(query) {
211
+ const expansions = [];
212
+ const words = this._tokenize(query).filter(word => !this.stopWords.has(word.toLowerCase()));
213
+
214
+ if (words.length > 1) {
215
+ // AND variations (explicit)
216
+ expansions.push(words.join(' AND '));
217
+
218
+ // OR variations
219
+ expansions.push(words.join(' OR '));
220
+
221
+ // Mixed AND/OR for longer queries
222
+ if (words.length > 2) {
223
+ const primaryTerms = words.slice(0, 2);
224
+ const secondaryTerms = words.slice(2);
225
+
226
+ if (secondaryTerms.length > 0) {
227
+ expansions.push(`(${primaryTerms.join(' AND ')}) OR (${secondaryTerms.join(' OR ')})`);
228
+ }
229
+ }
230
+
231
+ // Proximity searches (if supported by search engine)
232
+ if (words.length === 2) {
233
+ expansions.push(`"${words.join(' ')}"~5`); // Within 5 words
234
+ }
235
+ }
236
+
237
+ return expansions;
238
+ }
239
+
240
+ /**
241
+ * Generate query suggestions based on common patterns
242
+ * @param {string} query
243
+ * @returns {Array<string>}
244
+ */
245
+ generateSuggestions(query) {
246
+ const suggestions = [];
247
+ const words = this._tokenize(query);
248
+
249
+ // Common search patterns
250
+ const patterns = [
251
+ `${query} tutorial`,
252
+ `${query} guide`,
253
+ `${query} examples`,
254
+ `${query} best practices`,
255
+ `${query} documentation`,
256
+ `how to ${query}`,
257
+ `${query} vs alternatives`,
258
+ `${query} tips`,
259
+ `learn ${query}`,
260
+ `${query} review`
261
+ ];
262
+
263
+ // Add contextual suggestions based on query type
264
+ if (this._isProductQuery(query)) {
265
+ patterns.push(
266
+ `${query} price`,
267
+ `${query} review`,
268
+ `buy ${query}`,
269
+ `${query} comparison`
270
+ );
271
+ }
272
+
273
+ if (this._isTechnicalQuery(query)) {
274
+ patterns.push(
275
+ `${query} API`,
276
+ `${query} configuration`,
277
+ `${query} troubleshooting`,
278
+ `${query} installation`
279
+ );
280
+ }
281
+
282
+ return patterns.slice(0, 8); // Limit suggestions
283
+ }
284
+
285
+ /**
286
+ * Initialize synonym dictionary
287
+ * @private
288
+ */
289
+ _initializeSynonyms() {
290
+ return {
291
+ // Technology synonyms
292
+ 'web': ['website', 'internet', 'online'],
293
+ 'website': ['web', 'site', 'portal'],
294
+ 'app': ['application', 'software', 'program'],
295
+ 'mobile': ['smartphone', 'phone', 'cellular'],
296
+ 'computer': ['pc', 'desktop', 'machine'],
297
+ 'software': ['program', 'app', 'application'],
298
+ 'development': ['programming', 'coding', 'dev'],
299
+ 'programming': ['coding', 'development', 'dev'],
300
+ 'database': ['db', 'data storage', 'repository'],
301
+ 'server': ['host', 'backend', 'service'],
302
+ 'api': ['interface', 'endpoint', 'service'],
303
+
304
+ // Business synonyms
305
+ 'company': ['business', 'organization', 'firm', 'corporation'],
306
+ 'business': ['company', 'enterprise', 'firm'],
307
+ 'customer': ['client', 'user', 'consumer'],
308
+ 'product': ['item', 'good', 'service'],
309
+ 'service': ['offering', 'solution', 'product'],
310
+
311
+ // General synonyms
312
+ 'big': ['large', 'huge', 'massive'],
313
+ 'small': ['little', 'tiny', 'mini'],
314
+ 'fast': ['quick', 'rapid', 'speedy'],
315
+ 'slow': ['sluggish', 'gradual', 'delayed'],
316
+ 'good': ['excellent', 'great', 'quality'],
317
+ 'bad': ['poor', 'terrible', 'awful'],
318
+ 'new': ['recent', 'latest', 'modern'],
319
+ 'old': ['vintage', 'classic', 'legacy'],
320
+ 'free': ['gratis', 'complimentary', 'no cost'],
321
+ 'cheap': ['affordable', 'budget', 'economical'],
322
+ 'expensive': ['costly', 'premium', 'high-end']
323
+ };
324
+ }
325
+
326
+ /**
327
+ * Initialize common misspelling corrections
328
+ * @private
329
+ */
330
+ _initializeSpellingCorrections() {
331
+ return {
332
+ // Common technology misspellings
333
+ 'javascirpt': 'javascript',
334
+ 'javscript': 'javascript',
335
+ 'phyton': 'python',
336
+ 'pyhton': 'python',
337
+ 'databse': 'database',
338
+ 'developement': 'development',
339
+ 'progamming': 'programming',
340
+ 'algoritm': 'algorithm',
341
+ 'machien': 'machine',
342
+ 'compuer': 'computer',
343
+ 'webiste': 'website',
344
+ 'appliation': 'application',
345
+ 'sofware': 'software',
346
+ 'programing': 'programming',
347
+
348
+ // Common word misspellings
349
+ 'recieve': 'receive',
350
+ 'seperate': 'separate',
351
+ 'definately': 'definitely',
352
+ 'occured': 'occurred',
353
+ 'begining': 'beginning',
354
+ 'achievment': 'achievement',
355
+ 'beleive': 'believe',
356
+ 'freind': 'friend',
357
+ 'wierd': 'weird',
358
+ 'speach': 'speech',
359
+ 'realy': 'really',
360
+ 'alot': 'a lot',
361
+ 'wich': 'which',
362
+ 'thier': 'their',
363
+ 'lenght': 'length'
364
+ };
365
+ }
366
+
367
+ /**
368
+ * Initialize stop words
369
+ * @private
370
+ */
371
+ _initializeStopWords() {
372
+ return new Set([
373
+ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
374
+ 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the',
375
+ 'to', 'was', 'were', 'will', 'with', 'would', 'could', 'should',
376
+ 'this', 'these', 'they', 'them', 'their', 'there', 'then', 'than'
377
+ ]);
378
+ }
379
+
380
+ /**
381
+ * Tokenize query into words
382
+ * @private
383
+ */
384
+ _tokenize(query) {
385
+ return query.toLowerCase()
386
+ .replace(/[^\w\s]/g, ' ')
387
+ .split(/\s+/)
388
+ .filter(word => word.length > 0);
389
+ }
390
+
391
+ /**
392
+ * Apply simple stemming rules
393
+ * @private
394
+ */
395
+ _applyStemming(word) {
396
+ const stemRules = [
397
+ { suffix: 'ies', replacement: 'y', minLength: 4 },
398
+ { suffix: 'ied', replacement: 'y', minLength: 4 },
399
+ { suffix: 'ing', replacement: '', minLength: 4 },
400
+ { suffix: 'ed', replacement: '', minLength: 3 },
401
+ { suffix: 's', replacement: '', minLength: 3 },
402
+ { suffix: 'ly', replacement: '', minLength: 4 },
403
+ { suffix: 'er', replacement: '', minLength: 3 },
404
+ { suffix: 'est', replacement: '', minLength: 4 }
405
+ ];
406
+
407
+ const lowerWord = word.toLowerCase();
408
+
409
+ for (const rule of stemRules) {
410
+ if (lowerWord.length >= rule.minLength && lowerWord.endsWith(rule.suffix)) {
411
+ return lowerWord.slice(0, -rule.suffix.length) + rule.replacement;
412
+ }
413
+ }
414
+
415
+ return lowerWord;
416
+ }
417
+
418
+ /**
419
+ * Generate combinations from synonym groups
420
+ * @private
421
+ */
422
+ _generateCombinations(groups, maxCombinations) {
423
+ const combinations = [];
424
+ const maxGroups = Math.min(groups.length, 3); // Limit complexity
425
+
426
+ // Generate simple combinations
427
+ for (let i = 0; i < Math.min(maxCombinations, maxGroups); i++) {
428
+ const combo = groups.map((group, index) =>
429
+ index === i && group.length > 1 ? group[1] : group[0]
430
+ );
431
+ combinations.push(combo);
432
+ }
433
+
434
+ return combinations;
435
+ }
436
+
437
+ /**
438
+ * Create mixed variations of stemmed and original words
439
+ * @private
440
+ */
441
+ _createStemMixVariations(originalWords, stemmedWords) {
442
+ const variations = [];
443
+
444
+ // Mix original and stemmed words
445
+ for (let i = 0; i < originalWords.length; i++) {
446
+ if (originalWords[i] !== stemmedWords[i]) {
447
+ const mixed = [...originalWords];
448
+ mixed[i] = stemmedWords[i];
449
+ variations.push(mixed.join(' '));
450
+ }
451
+ }
452
+
453
+ return variations;
454
+ }
455
+
456
+ /**
457
+ * Detect potential phrases in query
458
+ * @private
459
+ */
460
+ _detectPhrases(query) {
461
+ const phrases = [];
462
+ const words = query.split(' ');
463
+
464
+ // Look for consecutive capitalized words
465
+ let currentPhrase = [];
466
+ words.forEach(word => {
467
+ if (word.charAt(0) === word.charAt(0).toUpperCase() && word.length > 1) {
468
+ currentPhrase.push(word);
469
+ } else {
470
+ if (currentPhrase.length > 1) {
471
+ phrases.push(currentPhrase.join(' '));
472
+ }
473
+ currentPhrase = [];
474
+ }
475
+ });
476
+
477
+ if (currentPhrase.length > 1) {
478
+ phrases.push(currentPhrase.join(' '));
479
+ }
480
+
481
+ return phrases;
482
+ }
483
+
484
+ /**
485
+ * Generate sub-phrases from complex queries
486
+ * @private
487
+ */
488
+ _generateSubPhrases(query) {
489
+ const words = this._tokenize(query);
490
+ const phrases = [];
491
+
492
+ // Generate 2-word and 3-word phrases
493
+ for (let i = 0; i < words.length - 1; i++) {
494
+ // 2-word phrases
495
+ phrases.push(`${words[i]} ${words[i + 1]}`);
496
+
497
+ // 3-word phrases
498
+ if (i < words.length - 2) {
499
+ phrases.push(`${words[i]} ${words[i + 1]} ${words[i + 2]}`);
500
+ }
501
+ }
502
+
503
+ return phrases;
504
+ }
505
+
506
+ /**
507
+ * Check if query is product-related
508
+ * @private
509
+ */
510
+ _isProductQuery(query) {
511
+ const productKeywords = ['buy', 'purchase', 'price', 'cost', 'shop', 'store', 'product', 'item'];
512
+ return productKeywords.some(keyword => query.toLowerCase().includes(keyword));
513
+ }
514
+
515
+ /**
516
+ * Check if query is technical
517
+ * @private
518
+ */
519
+ _isTechnicalQuery(query) {
520
+ const techKeywords = ['api', 'code', 'programming', 'development', 'software', 'app', 'web', 'database', 'server'];
521
+ return techKeywords.some(keyword => query.toLowerCase().includes(keyword));
522
+ }
523
+
524
+ /**
525
+ * Get statistics about expansion capabilities
526
+ */
527
+ getStats() {
528
+ return {
529
+ synonymCount: Object.keys(this.synonymDict).length,
530
+ spellingCorrectionCount: Object.keys(this.spellingDict).length,
531
+ stopWordCount: this.stopWords.size,
532
+ capabilities: {
533
+ synonyms: this.options.enableSynonyms,
534
+ spellCheck: this.options.enableSpellCheck,
535
+ stemming: this.options.enableStemming,
536
+ phrases: this.options.enablePhraseDetection,
537
+ booleanOperators: this.options.enableBooleanOperators
538
+ }
539
+ };
540
+ }
541
+ }
542
+
543
+ export default QueryExpander;