@humbletoes/google-search 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +339 -0
  3. package/bin/google-search +3 -0
  4. package/bin/google-search-mcp +3 -0
  5. package/bin/google-search-mcp.cmd +2 -0
  6. package/bin/google-search.cmd +2 -0
  7. package/dist/browser-config.d.ts +41 -0
  8. package/dist/browser-config.js +96 -0
  9. package/dist/browser-config.js.map +1 -0
  10. package/dist/browser-pool.d.ts +13 -0
  11. package/dist/browser-pool.js +37 -0
  12. package/dist/browser-pool.js.map +1 -0
  13. package/dist/cache.d.ts +48 -0
  14. package/dist/cache.js +111 -0
  15. package/dist/cache.js.map +1 -0
  16. package/dist/errors.d.ts +26 -0
  17. package/dist/errors.js +48 -0
  18. package/dist/errors.js.map +1 -0
  19. package/dist/filters.d.ts +48 -0
  20. package/dist/filters.js +192 -0
  21. package/dist/filters.js.map +1 -0
  22. package/dist/html-cleaner.d.ts +62 -0
  23. package/dist/html-cleaner.js +236 -0
  24. package/dist/html-cleaner.js.map +1 -0
  25. package/dist/index.d.ts +2 -0
  26. package/dist/index.js +59 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/logger.d.ts +2 -0
  29. package/dist/logger.js +41 -0
  30. package/dist/logger.js.map +1 -0
  31. package/dist/mcp-server.d.ts +9 -0
  32. package/dist/mcp-server.js +822 -0
  33. package/dist/mcp-server.js.map +1 -0
  34. package/dist/search.d.ts +18 -0
  35. package/dist/search.js +1080 -0
  36. package/dist/search.js.map +1 -0
  37. package/dist/types.d.ts +67 -0
  38. package/dist/types.js +2 -0
  39. package/dist/types.js.map +1 -0
  40. package/dist/validation.d.ts +6 -0
  41. package/dist/validation.js +23 -0
  42. package/dist/validation.js.map +1 -0
  43. package/dist/web-fetcher.d.ts +10 -0
  44. package/dist/web-fetcher.js +179 -0
  45. package/dist/web-fetcher.js.map +1 -0
  46. package/package.json +67 -0
  47. package/scripts/setup.js +53 -0
package/dist/cache.js ADDED
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Simple LRU cache implementation for search results
3
+ * Reduces redundant searches and improves performance
4
+ */
5
+ import logger from "./logger.js";
6
+ export class SearchCache {
7
+ constructor(maxSize = 100, ttl = 300000) {
8
+ this.totalHits = 0;
9
+ this.totalMisses = 0;
10
+ this.cache = new Map();
11
+ this.maxSize = maxSize;
12
+ this.ttl = ttl;
13
+ }
14
+ /**
15
+ * Generate cache key from query and options
16
+ */
17
+ generateKey(query, limit) {
18
+ return `${query.toLowerCase().trim()}:${limit || 10}`;
19
+ }
20
+ /**
21
+ * Get cached result if valid
22
+ */
23
+ get(query, limit, ttl) {
24
+ const key = this.generateKey(query, limit);
25
+ const entry = this.cache.get(key);
26
+ if (!entry) {
27
+ this.totalMisses++;
28
+ logger.debug({ query }, "Cache miss");
29
+ return null;
30
+ }
31
+ // Check if entry is expired
32
+ const age = Date.now() - entry.timestamp;
33
+ const effectiveTtl = ttl || this.ttl;
34
+ if (age > effectiveTtl) {
35
+ this.totalMisses++;
36
+ logger.debug({ query, age, effectiveTtl }, "Cache entry expired");
37
+ this.cache.delete(key);
38
+ return null;
39
+ }
40
+ // Update hit count
41
+ entry.hits++;
42
+ this.totalHits++;
43
+ logger.info({ query, age, hits: entry.hits }, "Cache hit");
44
+ return entry.data;
45
+ }
46
+ /**
47
+ * Store result in cache
48
+ */
49
+ set(query, data, limit, ttl) {
50
+ const key = this.generateKey(query, limit);
51
+ // If cache is full, remove least recently used entry
52
+ if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
53
+ const firstKey = this.cache.keys().next().value;
54
+ if (firstKey) {
55
+ this.cache.delete(firstKey);
56
+ logger.debug({ removedKey: firstKey }, "Cache eviction");
57
+ }
58
+ }
59
+ this.cache.set(key, {
60
+ data,
61
+ timestamp: Date.now(),
62
+ hits: 0,
63
+ });
64
+ logger.debug({ query, cacheSize: this.cache.size }, "Cached search result");
65
+ }
66
+ /**
67
+ * Clear all cache entries
68
+ */
69
+ clear() {
70
+ const size = this.cache.size;
71
+ this.cache.clear();
72
+ logger.info({ clearedEntries: size }, "Cache cleared");
73
+ }
74
+ /**
75
+ * Remove expired entries
76
+ */
77
+ cleanup() {
78
+ const now = Date.now();
79
+ let removed = 0;
80
+ for (const [key, entry] of this.cache.entries()) {
81
+ if (now - entry.timestamp > this.ttl) {
82
+ this.cache.delete(key);
83
+ removed++;
84
+ }
85
+ }
86
+ if (removed > 0) {
87
+ logger.info({ removed, remaining: this.cache.size }, "Cache cleanup completed");
88
+ }
89
+ }
90
+ /**
91
+ * Get cache statistics
92
+ */
93
+ getStats() {
94
+ const now = Date.now();
95
+ const entries = Array.from(this.cache.entries()).map(([key, entry]) => ({
96
+ key,
97
+ age: now - entry.timestamp,
98
+ hits: entry.hits,
99
+ }));
100
+ return {
101
+ size: this.cache.size,
102
+ maxSize: this.maxSize,
103
+ ttl: this.ttl,
104
+ hits: this.totalHits,
105
+ misses: this.totalMisses,
106
+ entries,
107
+ };
108
+ }
109
+ }
110
+ // Note: No singleton instance, create per use with desired TTL
111
+ //# sourceMappingURL=cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,MAAM,MAAM,aAAa,CAAC;AAQjC,MAAM,OAAO,WAAW;IAOtB,YAAY,UAAkB,GAAG,EAAE,MAAc,MAAM;QAH/C,cAAS,GAAW,CAAC,CAAC;QACtB,gBAAW,GAAW,CAAC,CAAC;QAG9B,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,KAAa,EAAE,KAAc;QAC/C,OAAO,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE,CAAC;IACxD,CAAC;IAED;;QAEI;IACH,GAAG,CAAC,KAAa,EAAE,KAAc,EAAE,GAAY;QAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAElC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,IAAI,CAAC,WAAW,EAAE,CAAC;YACnB,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,EAAE,YAAY,CAAC,CAAC;YACtC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,4BAA4B;QAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,CAAC;QACzC,MAAM,YAAY,GAAG,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC;QACrC,IAAI,GAAG,GAAG,YAAY,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,EAAE,CAAC;YACnB,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,YAAY,EAAE,EAAE,qBAAqB,CAAC,CAAC;YAClE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,mBAAmB;QACnB,KAAK,CAAC,IAAI,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,WAAW,CAAC,CAAC;QAC3D,OAAO,KAAK,CAAC,IAAI,CAAC;IACpB,CAAC;IAED;;QAEI;IACH,GAAG,CAAC,KAAa,EAAE,IAAoB,EAAE,KAAc,EAAE,GAAY;QACpE,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAE3C,qDAAqD;QACrD,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5D,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAChD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC5B,MAAM,CAAC,KAAK,CAAC,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,gBAAgB,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE;YAClB,IAAI;YACJ,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,IAAI,EAAE,CAAC;SACR,CAAC,CAAC;QAEH,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,sBAAsB,CAAC,CAAC;IAC9E,CAAC;IAED;;OAEG;IACH,KAAK;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;QAC7B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,EAAE,eAAe,CAAC,CAAC;IACzD,CAAC;IAED;;OAEG;IACH,OAAO;QACL,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAChD,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACrC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACvB,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,yBAAyB,CAAC,CAAC;QAClF,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ;QAQN,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;YACtE,GAAG;YACH,GAAG,EAAE,GAAG,GAAG,KAAK,CAAC,SAAS;YAC1B,IAAI,EAAE,KAAK,CAAC,IAAI;SACjB,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,IAAI,EAAE,IAAI,CAAC,SAAS;YACpB,MAAM,EAAE,IAAI,CAAC,WAAW;YACxB,OAAO;SACR,CAAC;IACJ,CAAC;CACF;AAED,+DAA+D"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Base error class for search operations
3
+ */
4
+ export declare class SearchError extends Error {
5
+ code: string;
6
+ retryable: boolean;
7
+ constructor(message: string, code: string, retryable?: boolean);
8
+ }
9
+ /**
10
+ * Error thrown when CAPTCHA verification is required
11
+ */
12
+ export declare class CaptchaError extends SearchError {
13
+ constructor(message: string);
14
+ }
15
+ /**
16
+ * Error thrown when network issues occur
17
+ */
18
+ export declare class NetworkError extends SearchError {
19
+ constructor(message: string);
20
+ }
21
+ /**
22
+ * Manages retry logic for operations that may fail
23
+ */
24
+ export declare class RetryManager {
25
+ executeWithRetry<T>(operation: () => Promise<T>, maxRetries?: number, baseDelay?: number): Promise<T>;
26
+ }
package/dist/errors.js ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Base error class for search operations
3
+ */
4
+ export class SearchError extends Error {
5
+ constructor(message, code, retryable = false) {
6
+ super(message);
7
+ this.code = code;
8
+ this.retryable = retryable;
9
+ this.name = 'SearchError';
10
+ }
11
+ }
12
+ /**
13
+ * Error thrown when CAPTCHA verification is required
14
+ */
15
+ export class CaptchaError extends SearchError {
16
+ constructor(message) {
17
+ super(message, 'CAPTCHA_REQUIRED', true);
18
+ }
19
+ }
20
+ /**
21
+ * Error thrown when network issues occur
22
+ */
23
+ export class NetworkError extends SearchError {
24
+ constructor(message) {
25
+ super(message, 'NETWORK_ERROR', true);
26
+ }
27
+ }
28
+ /**
29
+ * Manages retry logic for operations that may fail
30
+ */
31
+ export class RetryManager {
32
+ async executeWithRetry(operation, maxRetries = 3, baseDelay = 1000) {
33
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
34
+ try {
35
+ return await operation();
36
+ }
37
+ catch (error) {
38
+ if (attempt === maxRetries || !(error instanceof SearchError) || !error.retryable) {
39
+ throw error;
40
+ }
41
+ const delay = baseDelay * Math.pow(2, attempt);
42
+ await new Promise(resolve => setTimeout(resolve, delay));
43
+ }
44
+ }
45
+ throw new Error('Retry logic error: should not reach here');
46
+ }
47
+ }
48
+ //# sourceMappingURL=errors.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,OAAO,WAAY,SAAQ,KAAK;IACpC,YAAY,OAAe,EAAS,IAAY,EAAS,YAAqB,KAAK;QACjF,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAQ;QAAS,cAAS,GAAT,SAAS,CAAiB;QAEjF,IAAI,CAAC,IAAI,GAAG,aAAa,CAAC;IAC5B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,YAAa,SAAQ,WAAW;IAC3C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAC3C,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,YAAa,SAAQ,WAAW;IAC3C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,eAAe,EAAE,IAAI,CAAC,CAAC;IACxC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,YAAY;IACvB,KAAK,CAAC,gBAAgB,CACpB,SAA2B,EAC3B,aAAqB,CAAC,EACtB,YAAoB,IAAI;QAExB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,OAAO,MAAM,SAAS,EAAE,CAAC;YAC3B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,OAAO,KAAK,UAAU,IAAI,CAAC,CAAC,KAAK,YAAY,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;oBAClF,MAAM,KAAK,CAAC;gBACd,CAAC;gBACD,MAAM,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;IAC9D,CAAC;CACF"}
@@ -0,0 +1,48 @@
1
+ import { SearchResult } from './types.js';
2
+ export interface FilterOptions {
3
+ includeDomains?: string[];
4
+ excludeDomains?: string[];
5
+ minSnippetLength?: number;
6
+ maxSnippetLength?: number;
7
+ onlyRichSnippets?: boolean;
8
+ topN?: number;
9
+ skipFirst?: number;
10
+ mustInclude?: string[];
11
+ mustExclude?: string[];
12
+ contentType?: 'all' | 'html' | 'pdf' | 'doc' | 'video' | 'image';
13
+ dateRange?: {
14
+ from?: Date;
15
+ to?: Date;
16
+ };
17
+ minRelevanceScore?: number;
18
+ }
19
+ export interface FilterStats {
20
+ totalResults: number;
21
+ filteredResults: number;
22
+ domainDistribution: Record<string, number>;
23
+ contentTypeDistribution: Record<string, number>;
24
+ richSnippetCount: number;
25
+ averageSnippetLength: number;
26
+ dateRange?: {
27
+ earliest?: string;
28
+ latest?: string;
29
+ };
30
+ }
31
+ export declare class ResultFilter {
32
+ /**
33
+ * Filter search results based on various criteria
34
+ */
35
+ static filter(results: SearchResult[], options: FilterOptions): SearchResult[];
36
+ /**
37
+ * Sort search results by various criteria
38
+ */
39
+ static sort(results: SearchResult[], sortBy: 'position' | 'snippetLength' | 'domain', order?: 'asc' | 'desc'): SearchResult[];
40
+ /**
41
+ * Get comprehensive statistics about filtered results
42
+ */
43
+ static getStats(results: SearchResult[]): FilterStats;
44
+ /**
45
+ * Add relevance scoring to results (basic implementation)
46
+ */
47
+ static addRelevanceScores(results: SearchResult[], query: string): SearchResult[];
48
+ }
@@ -0,0 +1,192 @@
1
+ export class ResultFilter {
2
+ /**
3
+ * Filter search results based on various criteria
4
+ */
5
+ static filter(results, options) {
6
+ let filtered = [...results];
7
+ // Domain filtering
8
+ if (options.includeDomains && options.includeDomains.length > 0) {
9
+ const domains = options.includeDomains.map(d => d.toLowerCase());
10
+ filtered = filtered.filter(result => result.domain && domains.some(domain => result.domain.toLowerCase().includes(domain) ||
11
+ domain.includes(result.domain.toLowerCase())));
12
+ }
13
+ if (options.excludeDomains && options.excludeDomains.length > 0) {
14
+ const domains = options.excludeDomains.map(d => d.toLowerCase());
15
+ filtered = filtered.filter(result => !result.domain || !domains.some(domain => result.domain.toLowerCase().includes(domain) ||
16
+ domain.includes(result.domain.toLowerCase())));
17
+ }
18
+ // Snippet length filtering
19
+ if (options.minSnippetLength !== undefined) {
20
+ filtered = filtered.filter(result => (result.snippetLength || 0) >= options.minSnippetLength);
21
+ }
22
+ if (options.maxSnippetLength !== undefined) {
23
+ filtered = filtered.filter(result => (result.snippetLength || 0) <= options.maxSnippetLength);
24
+ }
25
+ // Rich snippet filtering
26
+ if (options.onlyRichSnippets) {
27
+ filtered = filtered.filter(result => result.hasRichSnippet);
28
+ }
29
+ // Content type filtering (basic URL-based detection)
30
+ if (options.contentType && options.contentType !== 'all') {
31
+ filtered = filtered.filter(result => {
32
+ const url = result.link.toLowerCase();
33
+ switch (options.contentType) {
34
+ case 'pdf':
35
+ return url.endsWith('.pdf') || url.includes('.pdf?');
36
+ case 'doc':
37
+ return url.endsWith('.doc') || url.endsWith('.docx') ||
38
+ url.endsWith('.txt') || url.includes('.doc');
39
+ case 'video':
40
+ return url.includes('youtube.com') || url.includes('vimeo.com') ||
41
+ url.endsWith('.mp4') || url.endsWith('.avi') ||
42
+ url.includes('/video/');
43
+ case 'image':
44
+ return url.endsWith('.jpg') || url.endsWith('.jpeg') ||
45
+ url.endsWith('.png') || url.endsWith('.gif') ||
46
+ url.endsWith('.webp') || url.includes('/image/');
47
+ case 'html':
48
+ default:
49
+ return !url.endsWith('.pdf') && !url.endsWith('.doc') &&
50
+ !url.endsWith('.docx') && !url.endsWith('.mp4') &&
51
+ !url.endsWith('.avi') && !url.includes('youtube.com') &&
52
+ !url.includes('vimeo.com');
53
+ }
54
+ });
55
+ }
56
+ // Keyword filtering
57
+ if (options.mustInclude && options.mustInclude.length > 0) {
58
+ filtered = filtered.filter(result => {
59
+ const text = (result.title + ' ' + result.snippet).toLowerCase();
60
+ return options.mustInclude.every(keyword => text.includes(keyword.toLowerCase()));
61
+ });
62
+ }
63
+ if (options.mustExclude && options.mustExclude.length > 0) {
64
+ filtered = filtered.filter(result => {
65
+ const text = (result.title + ' ' + result.snippet).toLowerCase();
66
+ return !options.mustExclude.some(keyword => text.includes(keyword.toLowerCase()));
67
+ });
68
+ }
69
+ // Position-based filtering
70
+ if (options.skipFirst && options.skipFirst > 0) {
71
+ filtered = filtered.slice(options.skipFirst);
72
+ }
73
+ if (options.topN && options.topN > 0) {
74
+ filtered = filtered.slice(0, options.topN);
75
+ }
76
+ return filtered;
77
+ }
78
+ /**
79
+ * Sort search results by various criteria
80
+ */
81
+ static sort(results, sortBy, order = 'asc') {
82
+ return [...results].sort((a, b) => {
83
+ let comparison = 0;
84
+ switch (sortBy) {
85
+ case 'position':
86
+ comparison = (a.position || 0) - (b.position || 0);
87
+ break;
88
+ case 'snippetLength':
89
+ comparison = (a.snippetLength || 0) - (b.snippetLength || 0);
90
+ break;
91
+ case 'domain':
92
+ comparison = (a.domain || '').localeCompare(b.domain || '');
93
+ break;
94
+ }
95
+ return order === 'desc' ? -comparison : comparison;
96
+ });
97
+ }
98
+ /**
99
+ * Get comprehensive statistics about filtered results
100
+ */
101
+ static getStats(results) {
102
+ const domainDistribution = {};
103
+ const contentTypeDistribution = {
104
+ html: 0,
105
+ pdf: 0,
106
+ doc: 0,
107
+ video: 0,
108
+ image: 0,
109
+ other: 0
110
+ };
111
+ let richSnippetCount = 0;
112
+ let totalSnippetLength = 0;
113
+ results.forEach(result => {
114
+ // Domain distribution
115
+ const domain = result.domain || 'unknown';
116
+ domainDistribution[domain] = (domainDistribution[domain] || 0) + 1;
117
+ // Content type distribution
118
+ const url = result.link.toLowerCase();
119
+ if (url.endsWith('.pdf') || url.includes('.pdf?')) {
120
+ contentTypeDistribution.pdf++;
121
+ }
122
+ else if (url.endsWith('.doc') || url.endsWith('.docx') || url.endsWith('.txt')) {
123
+ contentTypeDistribution.doc++;
124
+ }
125
+ else if (url.includes('youtube.com') || url.includes('vimeo.com') ||
126
+ url.endsWith('.mp4') || url.endsWith('.avi') || url.includes('/video/')) {
127
+ contentTypeDistribution.video++;
128
+ }
129
+ else if (url.endsWith('.jpg') || url.endsWith('.jpeg') || url.endsWith('.png') ||
130
+ url.endsWith('.gif') || url.endsWith('.webp') || url.includes('/image/')) {
131
+ contentTypeDistribution.image++;
132
+ }
133
+ else {
134
+ contentTypeDistribution.html++;
135
+ }
136
+ // Rich snippet count
137
+ if (result.hasRichSnippet) {
138
+ richSnippetCount++;
139
+ }
140
+ // Snippet length
141
+ totalSnippetLength += result.snippetLength || 0;
142
+ });
143
+ return {
144
+ totalResults: results.length,
145
+ filteredResults: results.length,
146
+ domainDistribution,
147
+ contentTypeDistribution,
148
+ richSnippetCount,
149
+ averageSnippetLength: results.length > 0 ? totalSnippetLength / results.length : 0,
150
+ };
151
+ }
152
+ /**
153
+ * Add relevance scoring to results (basic implementation)
154
+ */
155
+ static addRelevanceScores(results, query) {
156
+ const queryWords = query.toLowerCase().split(/\s+/).filter(word => word.length > 2);
157
+ return results.map(result => {
158
+ let score = 0;
159
+ const title = result.title.toLowerCase();
160
+ const snippet = result.snippet.toLowerCase();
161
+ // Title matches are most important
162
+ queryWords.forEach(word => {
163
+ if (title.includes(word)) {
164
+ score += 10;
165
+ }
166
+ if (snippet.includes(word)) {
167
+ score += 5;
168
+ }
169
+ });
170
+ // Position bonus (earlier results are more relevant)
171
+ if (result.position) {
172
+ score += Math.max(0, 20 - result.position);
173
+ }
174
+ // Rich snippet bonus
175
+ if (result.hasRichSnippet) {
176
+ score += 5;
177
+ }
178
+ // Domain authority bonus (basic heuristic)
179
+ if (result.domain) {
180
+ const trustedDomains = ['github.com', 'stackoverflow.com', 'wikipedia.org', 'docs.microsoft.com'];
181
+ if (trustedDomains.some(domain => result.domain.includes(domain))) {
182
+ score += 3;
183
+ }
184
+ }
185
+ return {
186
+ ...result,
187
+ relevanceScore: score
188
+ };
189
+ });
190
+ }
191
+ }
192
+ //# sourceMappingURL=filters.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"filters.js","sourceRoot":"","sources":["../src/filters.ts"],"names":[],"mappings":"AAiCA,MAAM,OAAO,YAAY;IACvB;;OAEG;IACH,MAAM,CAAC,MAAM,CAAC,OAAuB,EAAE,OAAsB;QAC3D,IAAI,QAAQ,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC;QAE5B,mBAAmB;QACnB,IAAI,OAAO,CAAC,cAAc,IAAI,OAAO,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACjE,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAClC,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CACrC,MAAM,CAAC,MAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC7C,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAO,CAAC,WAAW,EAAE,CAAC,CAC9C,CACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,CAAC,cAAc,IAAI,OAAO,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACjE,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAClC,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CACvC,MAAM,CAAC,MAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC7C,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAO,CAAC,WAAW,EAAE,CAAC,CAC9C,CACF,CAAC;QACJ,CAAC;QAED,2BAA2B;QAC3B,IAAI,OAAO,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;YAC3C,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAClC,CAAC,MAAM,CAAC,aAAa,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,gBAAiB,CACzD,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;YAC3C,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAClC,CAAC,MAAM,CAAC,aAAa,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,gBAAiB,CACzD,CAAC;QACJ,CAAC;QAED,yBAAyB;QACzB,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;YAC7B,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;QAC9D,CAAC;QAED,qDAAqD;QACrD,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,KAAK,KAAK,EAAE,CAAC;YACzD,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE;gBAClC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBACtC,QAAQ,OAAO,CAAC,WAAW,EAAE,CAAC;oBAC5B,KAAK,KAAK;wBACR,OAAO,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;oBACvD,KAAK,KAAK;wBACR,OAAO,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC;4BAC7C,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;oBACtD,KAAK,OAAO;wBACV,OAAO,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC;4BACxD,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;4BAC5C,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;oBACjC,KAAK,OAAO;wBACV,OAAO,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC;4BAC7C,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;4BAC5C,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;oBAC1D,KAAK,MAAM,CAAC;oBACZ;wBACE,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;4BAC9C,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;4BAC/C,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC;4BACrD,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;gBACtC,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1D,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE;gBAClC,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;gBACjE,OAAO,OAAO,CAAC,WAAY,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAC1C,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CACrC,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1D,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE;gBAClC,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;gBACjE,OAAO,CAAC,OAAO,CAAC,WAAY,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAC1C,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CACrC,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAED,2BAA2B;QAC3B,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YAC/C,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAC/C,CAAC;QAED,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YACrC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;QAC7C,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAI,CAAC,OAAuB,EAAE,MAA+C,EAAE,QAAwB,KAAK;QACjH,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChC,IAAI,UAAU,GAAG,CAAC,CAAC;YAEnB,QAAQ,MAAM,EAAE,CAAC;gBACf,KAAK,UAAU;oBACb,UAAU,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;oBACnD,MAAM;gBACR,KAAK,eAAe;oBAClB,UAAU,GAAG,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;oBAC7D,MAAM;gBACR,KAAK,QAAQ;oBACX,UAAU,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;oBAC5D,MAAM;YACV,CAAC;YAED,OAAO,KAAK,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC;QACrD,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,QAAQ,CAAC,OAAuB;QACrC,MAAM,kBAAkB,GAA2B,EAAE,CAAC;QACtD,MAAM,uBAAuB,GAA2B;YACtD,IAAI,EAAE,CAAC;YACP,GAAG,EAAE,CAAC;YACN,GAAG,EAAE,CAAC;YACN,KAAK,EAAE,CAAC;YACR,KAAK,EAAE,CAAC;YACR,KAAK,EAAE,CAAC;SACT,CAAC;QAEF,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,kBAAkB,GAAG,CAAC,CAAC;QAE3B,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACvB,sBAAsB;YACtB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,SAAS,CAAC;YAC1C,kBAAkB,CAAC,MAAM,CAAC,GAAG,CAAC,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YAEnE,4BAA4B;YAC5B,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,uBAAuB,CAAC,GAAG,EAAE,CAAC;YAChC,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACjF,uBAAuB,CAAC,GAAG,EAAE,CAAC;YAChC,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC;gBACxD,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACnF,uBAAuB,CAAC,KAAK,EAAE,CAAC;YAClC,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACrE,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACpF,uBAAuB,CAAC,KAAK,EAAE,CAAC;YAClC,CAAC;iBAAM,CAAC;gBACN,uBAAuB,CAAC,IAAI,EAAE,CAAC;YACjC,CAAC;YAED,qBAAqB;YACrB,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBAC1B,gBAAgB,EAAE,CAAC;YACrB,CAAC;YAED,iBAAiB;YACjB,kBAAkB,IAAI,MAAM,CAAC,aAAa,IAAI,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,YAAY,EAAE,OAAO,CAAC,MAAM;YAC5B,eAAe,EAAE,OAAO,CAAC,MAAM;YAC/B,kBAAkB;YAClB,uBAAuB;YACvB,gBAAgB;YAChB,oBAAoB,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;SACnF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,kBAAkB,CAAC,OAAuB,EAAE,KAAa;QAC9D,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEpF,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;YAC1B,IAAI,KAAK,GAAG,CAAC,CAAC;YACd,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAE7C,mCAAmC;YACnC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;gBACxB,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,KAAK,IAAI,EAAE,CAAC;gBACd,CAAC;gBACD,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC3B,KAAK,IAAI,CAAC,CAAC;gBACb,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,qDAAqD;YACrD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACpB,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC7C,CAAC;YAED,qBAAqB;YACrB,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBAC1B,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;YAED,2CAA2C;YAC3C,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClB,MAAM,cAAc,GAAG,CAAC,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,oBAAoB,CAAC,CAAC;gBAClG,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;oBACnE,KAAK,IAAI,CAAC,CAAC;gBACb,CAAC;YACH,CAAC;YAED,OAAO;gBACL,GAAG,MAAM;gBACT,cAAc,EAAE,KAAK;aACuB,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -0,0 +1,62 @@
1
+ export interface CleaningStats {
2
+ originalSize: number;
3
+ cleanedSize: number;
4
+ reductionPercent: number;
5
+ originalLines: number;
6
+ cleanedLines: number;
7
+ elementsRemoved: {
8
+ scripts: number;
9
+ styles: number;
10
+ images: number;
11
+ ads: number;
12
+ navigation: number;
13
+ comments: number;
14
+ };
15
+ contentAnalysis: {
16
+ hasSearchResults: boolean;
17
+ resultCount: number;
18
+ hasRichSnippets: boolean;
19
+ hasAds: boolean;
20
+ readabilityScore: number;
21
+ };
22
+ }
23
+ export declare class HtmlCleaner {
24
+ /**
25
+ * Extract and clean search results HTML for optimal LLM consumption
26
+ */
27
+ static extractSearchResults(html: string): string;
28
+ /**
29
+ * Get comprehensive cleaning statistics
30
+ */
31
+ static getCleaningStats(originalHtml: string, cleanedHtml: string): CleaningStats;
32
+ /**
33
+ * Detect if HTML contains search results
34
+ */
35
+ private static detectSearchResults;
36
+ /**
37
+ * Count approximate number of search results
38
+ */
39
+ private static countSearchResults;
40
+ /**
41
+ * Detect rich snippets and structured data
42
+ */
43
+ private static detectRichSnippets;
44
+ /**
45
+ * Calculate basic readability score
46
+ */
47
+ private static calculateReadabilityScore;
48
+ /**
49
+ * Extract structured data from HTML (JSON-LD, microdata, etc.)
50
+ */
51
+ static extractStructuredData(html: string): any[];
52
+ /**
53
+ * Extract meta information from HTML
54
+ */
55
+ static extractMetaInfo(html: string): {
56
+ title?: string;
57
+ description?: string;
58
+ keywords?: string;
59
+ robots?: string;
60
+ canonical?: string;
61
+ };
62
+ }