llm-search-tools 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +244 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/integration.test.d.ts +1 -0
- package/dist/integration.test.js +237 -0
- package/dist/modules/answerbox.test.d.ts +1 -0
- package/dist/modules/answerbox.test.js +105 -0
- package/dist/modules/autocomplete.d.ts +11 -0
- package/dist/modules/autocomplete.js +159 -0
- package/dist/modules/autocomplete.test.d.ts +1 -0
- package/dist/modules/autocomplete.test.js +188 -0
- package/dist/modules/common.d.ts +26 -0
- package/dist/modules/common.js +263 -0
- package/dist/modules/common.test.d.ts +1 -0
- package/dist/modules/common.test.js +87 -0
- package/dist/modules/crawl.d.ts +9 -0
- package/dist/modules/crawl.js +117 -0
- package/dist/modules/crawl.test.d.ts +1 -0
- package/dist/modules/crawl.test.js +48 -0
- package/dist/modules/events.d.ts +8 -0
- package/dist/modules/events.js +129 -0
- package/dist/modules/events.test.d.ts +1 -0
- package/dist/modules/events.test.js +104 -0
- package/dist/modules/finance.d.ts +10 -0
- package/dist/modules/finance.js +20 -0
- package/dist/modules/finance.test.d.ts +1 -0
- package/dist/modules/finance.test.js +77 -0
- package/dist/modules/flights.d.ts +8 -0
- package/dist/modules/flights.js +135 -0
- package/dist/modules/flights.test.d.ts +1 -0
- package/dist/modules/flights.test.js +128 -0
- package/dist/modules/hackernews.d.ts +8 -0
- package/dist/modules/hackernews.js +87 -0
- package/dist/modules/hackernews.js.map +1 -0
- package/dist/modules/images.test.d.ts +1 -0
- package/dist/modules/images.test.js +145 -0
- package/dist/modules/integrations.test.d.ts +1 -0
- package/dist/modules/integrations.test.js +93 -0
- package/dist/modules/media.d.ts +11 -0
- package/dist/modules/media.js +132 -0
- package/dist/modules/media.test.d.ts +1 -0
- package/dist/modules/media.test.js +186 -0
- package/dist/modules/news.d.ts +3 -0
- package/dist/modules/news.js +39 -0
- package/dist/modules/news.test.d.ts +1 -0
- package/dist/modules/news.test.js +88 -0
- package/dist/modules/parser.d.ts +19 -0
- package/dist/modules/parser.js +361 -0
- package/dist/modules/parser.test.d.ts +1 -0
- package/dist/modules/parser.test.js +151 -0
- package/dist/modules/reddit.d.ts +21 -0
- package/dist/modules/reddit.js +107 -0
- package/dist/modules/scrape.d.ts +16 -0
- package/dist/modules/scrape.js +272 -0
- package/dist/modules/scrape.test.d.ts +1 -0
- package/dist/modules/scrape.test.js +232 -0
- package/dist/modules/scraper.d.ts +12 -0
- package/dist/modules/scraper.js +640 -0
- package/dist/modules/scrapers/anidb.d.ts +8 -0
- package/dist/modules/scrapers/anidb.js +156 -0
- package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
- package/dist/modules/scrapers/duckduckgo.js +284 -0
- package/dist/modules/scrapers/google-news.d.ts +2 -0
- package/dist/modules/scrapers/google-news.js +60 -0
- package/dist/modules/scrapers/google.d.ts +6 -0
- package/dist/modules/scrapers/google.js +211 -0
- package/dist/modules/scrapers/searxng.d.ts +2 -0
- package/dist/modules/scrapers/searxng.js +93 -0
- package/dist/modules/scrapers/thetvdb.d.ts +3 -0
- package/dist/modules/scrapers/thetvdb.js +147 -0
- package/dist/modules/scrapers/tmdb.d.ts +3 -0
- package/dist/modules/scrapers/tmdb.js +172 -0
- package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
- package/dist/modules/scrapers/yahoo-finance.js +33 -0
- package/dist/modules/search.d.ts +5 -0
- package/dist/modules/search.js +45 -0
- package/dist/modules/search.js.map +1 -0
- package/dist/modules/search.test.d.ts +1 -0
- package/dist/modules/search.test.js +219 -0
- package/dist/modules/urbandictionary.d.ts +12 -0
- package/dist/modules/urbandictionary.js +26 -0
- package/dist/modules/webpage.d.ts +4 -0
- package/dist/modules/webpage.js +150 -0
- package/dist/modules/webpage.js.map +1 -0
- package/dist/modules/wikipedia.d.ts +5 -0
- package/dist/modules/wikipedia.js +85 -0
- package/dist/modules/wikipedia.js.map +1 -0
- package/dist/scripts/interactive-search.d.ts +1 -0
- package/dist/scripts/interactive-search.js +98 -0
- package/dist/test.d.ts +1 -0
- package/dist/test.js +179 -0
- package/dist/test.js.map +1 -0
- package/dist/testBraveSearch.d.ts +1 -0
- package/dist/testBraveSearch.js +34 -0
- package/dist/testDuckDuckGo.d.ts +1 -0
- package/dist/testDuckDuckGo.js +52 -0
- package/dist/testEcosia.d.ts +1 -0
- package/dist/testEcosia.js +57 -0
- package/dist/testSearchModule.d.ts +1 -0
- package/dist/testSearchModule.js +95 -0
- package/dist/testwebpage.d.ts +1 -0
- package/dist/testwebpage.js +81 -0
- package/dist/types.d.ts +174 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/createTestDocx.d.ts +1 -0
- package/dist/utils/createTestDocx.js +58 -0
- package/dist/utils/htmlcleaner.d.ts +20 -0
- package/dist/utils/htmlcleaner.js +172 -0
- package/docs/README.md +275 -0
- package/docs/autocomplete.md +73 -0
- package/docs/crawling.md +88 -0
- package/docs/events.md +58 -0
- package/docs/examples.md +158 -0
- package/docs/finance.md +60 -0
- package/docs/flights.md +71 -0
- package/docs/hackernews.md +121 -0
- package/docs/media.md +87 -0
- package/docs/news.md +75 -0
- package/docs/parser.md +197 -0
- package/docs/scraper.md +347 -0
- package/docs/search.md +106 -0
- package/docs/wikipedia.md +91 -0
- package/package.json +97 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { AutocompleteOptions, AutocompleteResult } from "../types";
|
|
2
|
+
type Provider = "google" | "duckduckgo" | "yahoo" | "brave" | "yandex" | "ecosia" | "startpage" | "qwant" | "swisscows";
|
|
3
|
+
/**
|
|
4
|
+
* Get autocomplete suggestions for a query
|
|
5
|
+
* @param query The search query
|
|
6
|
+
* @param provider The search provider to use (default: duckduckgo)
|
|
7
|
+
* @param options Options for the request
|
|
8
|
+
* @returns Promise<AutocompleteResult>
|
|
9
|
+
*/
|
|
10
|
+
export declare function getSuggestions(query: string, provider?: Provider, options?: AutocompleteOptions): Promise<AutocompleteResult>;
|
|
11
|
+
export {};
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getSuggestions = getSuggestions;
|
|
4
|
+
const common_1 = require("./common");
|
|
5
|
+
/**
|
|
6
|
+
* Get autocomplete suggestions for a query
|
|
7
|
+
* @param query The search query
|
|
8
|
+
* @param provider The search provider to use (default: duckduckgo)
|
|
9
|
+
* @param options Options for the request
|
|
10
|
+
* @returns Promise<AutocompleteResult>
|
|
11
|
+
*/
|
|
12
|
+
async function getSuggestions(query, provider = "duckduckgo", options = {}) {
|
|
13
|
+
try {
|
|
14
|
+
switch (provider) {
|
|
15
|
+
case "google":
|
|
16
|
+
return await getGoogleSuggestions(query, options);
|
|
17
|
+
case "duckduckgo":
|
|
18
|
+
return await getDuckDuckGoSuggestions(query, options);
|
|
19
|
+
case "yahoo":
|
|
20
|
+
return await getYahooSuggestions(query, options);
|
|
21
|
+
case "brave":
|
|
22
|
+
return await getBraveSuggestions(query, options);
|
|
23
|
+
case "yandex":
|
|
24
|
+
return await getYandexSuggestions(query, options);
|
|
25
|
+
case "ecosia":
|
|
26
|
+
return await getEcosiaSuggestions(query, options);
|
|
27
|
+
case "startpage":
|
|
28
|
+
return await getStartpageSuggestions(query, options);
|
|
29
|
+
case "qwant":
|
|
30
|
+
return await getQwantSuggestions(query, options);
|
|
31
|
+
case "swisscows":
|
|
32
|
+
return await getSwisscowsSuggestions(query, options);
|
|
33
|
+
default:
|
|
34
|
+
return await getDuckDuckGoSuggestions(query, options);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
catch (error) {
|
|
38
|
+
throw {
|
|
39
|
+
message: `Failed to get suggestions from ${provider}: ${error.message}`,
|
|
40
|
+
code: "AUTOCOMPLETE_ERROR",
|
|
41
|
+
originalError: error,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async function fetchJson(url, options) {
|
|
46
|
+
// Use fetchWithDetection to handle proxies and headers
|
|
47
|
+
const { body } = await (0, common_1.fetchWithDetection)(url, {
|
|
48
|
+
timeout: options.timeout,
|
|
49
|
+
proxy: options.proxy,
|
|
50
|
+
});
|
|
51
|
+
return JSON.parse(body);
|
|
52
|
+
}
|
|
53
|
+
// Google
|
|
54
|
+
async function getGoogleSuggestions(query, options) {
|
|
55
|
+
const url = `http://suggestqueries.google.com/complete/search?client=firefox&q=${encodeURIComponent(query)}`;
|
|
56
|
+
const data = await fetchJson(url, options);
|
|
57
|
+
// Format: ["query", ["sugg1", "sugg2", ...]]
|
|
58
|
+
return {
|
|
59
|
+
query,
|
|
60
|
+
suggestions: (data[1] || []).slice(0, options.limit),
|
|
61
|
+
source: "google",
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
// DuckDuckGo
|
|
65
|
+
async function getDuckDuckGoSuggestions(query, options) {
|
|
66
|
+
const url = `https://duckduckgo.com/ac/?kl=wt-wt&q=${encodeURIComponent(query)}`;
|
|
67
|
+
const data = await fetchJson(url, options);
|
|
68
|
+
// Format: [{"phrase": "sugg1"}, {"phrase": "sugg2"}, ...]
|
|
69
|
+
const suggestions = data.map((item) => item.phrase).slice(0, options.limit);
|
|
70
|
+
return {
|
|
71
|
+
query,
|
|
72
|
+
suggestions,
|
|
73
|
+
source: "duckduckgo",
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
// Yahoo
|
|
77
|
+
async function getYahooSuggestions(query, options) {
|
|
78
|
+
const url = `https://search.yahoo.com/sugg/gossip/gossip-us-fastbreak?output=sd1&command=${encodeURIComponent(query)}`;
|
|
79
|
+
const data = await fetchJson(url, options);
|
|
80
|
+
// Format: {"gossip": {"results": [{"key": "sugg1"}, ...]}}
|
|
81
|
+
const suggestions = (data.gossip?.results || []).map((item) => item.key).slice(0, options.limit);
|
|
82
|
+
return {
|
|
83
|
+
query,
|
|
84
|
+
suggestions,
|
|
85
|
+
source: "yahoo",
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// Brave
|
|
89
|
+
async function getBraveSuggestions(query, options) {
|
|
90
|
+
const url = `https://search.brave.com/api/suggest?rich=true&source=web&country=us&q=${encodeURIComponent(query)}`;
|
|
91
|
+
const data = await fetchJson(url, options);
|
|
92
|
+
// Format: [ ["query", ...], ["sugg1", "sugg2", ...] ] (OpenSearch compatible-ish)
|
|
93
|
+
const suggestions = (data[1] || []).slice(0, options.limit);
|
|
94
|
+
return {
|
|
95
|
+
query,
|
|
96
|
+
suggestions,
|
|
97
|
+
source: "brave",
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
// Yandex
|
|
101
|
+
async function getYandexSuggestions(query, options) {
|
|
102
|
+
const url = `https://yandex.com/suggest/suggest-ya.cgi?srv=morda_com_desktop&wiz=TrWth&uil=en&fact=1&v=4&icon=1&part=${encodeURIComponent(query)}`;
|
|
103
|
+
const data = await fetchJson(url, options);
|
|
104
|
+
// Format: ["query", ["sugg1", "sugg2", ...]]
|
|
105
|
+
const suggestions = (data[1] || []).slice(0, options.limit);
|
|
106
|
+
return {
|
|
107
|
+
query,
|
|
108
|
+
suggestions,
|
|
109
|
+
source: "yandex",
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
// Ecosia
|
|
113
|
+
async function getEcosiaSuggestions(query, options) {
|
|
114
|
+
const url = `https://ac.ecosia.org/?q=${encodeURIComponent(query)}`;
|
|
115
|
+
const data = await fetchJson(url, options);
|
|
116
|
+
// Format: {"suggestions": ["sugg1", "sugg2", ...]}
|
|
117
|
+
const suggestions = (data.suggestions || []).slice(0, options.limit);
|
|
118
|
+
return {
|
|
119
|
+
query,
|
|
120
|
+
suggestions,
|
|
121
|
+
source: "ecosia",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
// Startpage
|
|
125
|
+
async function getStartpageSuggestions(query, options) {
|
|
126
|
+
const url = `https://www.startpage.com/suggestions?q=${encodeURIComponent(query)}`;
|
|
127
|
+
const data = await fetchJson(url, options);
|
|
128
|
+
// Format: {"suggestions": [{"text": "sugg1"}, ...]}
|
|
129
|
+
const suggestions = (data.suggestions || []).map((item) => item.text).slice(0, options.limit);
|
|
130
|
+
return {
|
|
131
|
+
query,
|
|
132
|
+
suggestions,
|
|
133
|
+
source: "startpage",
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
// Qwant
|
|
137
|
+
async function getQwantSuggestions(query, options) {
|
|
138
|
+
const url = `https://api.qwant.com/v3/suggest?q=${encodeURIComponent(query)}`;
|
|
139
|
+
const data = await fetchJson(url, options);
|
|
140
|
+
// Format: {"data": {"items": [{"value": "sugg1"}, ...]}}
|
|
141
|
+
const suggestions = (data.data?.items || []).map((item) => item.value).slice(0, options.limit);
|
|
142
|
+
return {
|
|
143
|
+
query,
|
|
144
|
+
suggestions,
|
|
145
|
+
source: "qwant",
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
// Swisscows
|
|
149
|
+
async function getSwisscowsSuggestions(query, options) {
|
|
150
|
+
const url = `https://api.swisscows.com/suggest?locale=en-US&itemsCount=${options.limit || 10}&query=${encodeURIComponent(query)}`;
|
|
151
|
+
const data = await fetchJson(url, options);
|
|
152
|
+
// Format: ["sugg1", "sugg2", ...]
|
|
153
|
+
const suggestions = (Array.isArray(data) ? data : []).slice(0, options.limit);
|
|
154
|
+
return {
|
|
155
|
+
query,
|
|
156
|
+
suggestions,
|
|
157
|
+
source: "swisscows",
|
|
158
|
+
};
|
|
159
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const vitest_1 = require("vitest");
|
|
37
|
+
const autocomplete_1 = require("./autocomplete");
|
|
38
|
+
const common = __importStar(require("./common"));
|
|
39
|
+
// Mock the common module
|
|
40
|
+
vitest_1.vi.mock("./common", async () => {
|
|
41
|
+
const actual = await vitest_1.vi.importActual("./common");
|
|
42
|
+
return {
|
|
43
|
+
...actual,
|
|
44
|
+
fetchWithDetection: vitest_1.vi.fn(),
|
|
45
|
+
};
|
|
46
|
+
});
|
|
47
|
+
(0, vitest_1.describe)("Autocomplete Module", () => {
|
|
48
|
+
const mockQuery = "test";
|
|
49
|
+
const mockOptions = { limit: 5 };
|
|
50
|
+
(0, vitest_1.beforeEach)(() => {
|
|
51
|
+
vitest_1.vi.clearAllMocks();
|
|
52
|
+
});
|
|
53
|
+
(0, vitest_1.it)("should get suggestions from Google", async () => {
|
|
54
|
+
const mockResponse = JSON.stringify([
|
|
55
|
+
"test",
|
|
56
|
+
["test speed", "test internet", "test microphone"],
|
|
57
|
+
[],
|
|
58
|
+
{ google: { client: "firefox" } },
|
|
59
|
+
]);
|
|
60
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
61
|
+
body: mockResponse,
|
|
62
|
+
headers: new Headers(),
|
|
63
|
+
});
|
|
64
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "google", mockOptions);
|
|
65
|
+
(0, vitest_1.expect)(common.fetchWithDetection).toHaveBeenCalledWith(vitest_1.expect.stringContaining("google.com"), vitest_1.expect.any(Object));
|
|
66
|
+
(0, vitest_1.expect)(result.source).toBe("google");
|
|
67
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet", "test microphone"]);
|
|
68
|
+
(0, vitest_1.expect)(result.suggestions.length).toBeLessThanOrEqual(mockOptions.limit);
|
|
69
|
+
});
|
|
70
|
+
(0, vitest_1.it)("should get suggestions from DuckDuckGo", async () => {
|
|
71
|
+
const mockResponse = JSON.stringify([{ phrase: "test speed" }, { phrase: "test internet" }, { phrase: "tester" }]);
|
|
72
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
73
|
+
body: mockResponse,
|
|
74
|
+
headers: new Headers(),
|
|
75
|
+
});
|
|
76
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "duckduckgo", mockOptions);
|
|
77
|
+
(0, vitest_1.expect)(common.fetchWithDetection).toHaveBeenCalledWith(vitest_1.expect.stringContaining("duckduckgo.com"), vitest_1.expect.any(Object));
|
|
78
|
+
(0, vitest_1.expect)(result.source).toBe("duckduckgo");
|
|
79
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet", "tester"]);
|
|
80
|
+
});
|
|
81
|
+
(0, vitest_1.it)("should get suggestions from Yahoo", async () => {
|
|
82
|
+
const mockResponse = JSON.stringify({
|
|
83
|
+
gossip: {
|
|
84
|
+
results: [{ key: "test speed" }, { key: "test internet" }],
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
88
|
+
body: mockResponse,
|
|
89
|
+
headers: new Headers(),
|
|
90
|
+
});
|
|
91
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "yahoo", mockOptions);
|
|
92
|
+
(0, vitest_1.expect)(result.source).toBe("yahoo");
|
|
93
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
94
|
+
});
|
|
95
|
+
(0, vitest_1.it)("should get suggestions from Brave", async () => {
|
|
96
|
+
// OpenSearch format
|
|
97
|
+
const mockResponse = JSON.stringify(["test", ["test speed", "test internet"]]);
|
|
98
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
99
|
+
body: mockResponse,
|
|
100
|
+
headers: new Headers(),
|
|
101
|
+
});
|
|
102
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "brave", mockOptions);
|
|
103
|
+
(0, vitest_1.expect)(result.source).toBe("brave");
|
|
104
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
105
|
+
});
|
|
106
|
+
(0, vitest_1.it)("should get suggestions from Yandex", async () => {
|
|
107
|
+
const mockResponse = JSON.stringify(["test", ["test speed", "test internet"]]);
|
|
108
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
109
|
+
body: mockResponse,
|
|
110
|
+
headers: new Headers(),
|
|
111
|
+
});
|
|
112
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "yandex", mockOptions);
|
|
113
|
+
(0, vitest_1.expect)(result.source).toBe("yandex");
|
|
114
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
115
|
+
});
|
|
116
|
+
(0, vitest_1.it)("should get suggestions from Ecosia", async () => {
|
|
117
|
+
const mockResponse = JSON.stringify({
|
|
118
|
+
suggestions: ["test speed", "test internet"],
|
|
119
|
+
});
|
|
120
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
121
|
+
body: mockResponse,
|
|
122
|
+
headers: new Headers(),
|
|
123
|
+
});
|
|
124
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "ecosia", mockOptions);
|
|
125
|
+
(0, vitest_1.expect)(result.source).toBe("ecosia");
|
|
126
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
127
|
+
});
|
|
128
|
+
(0, vitest_1.it)("should get suggestions from Startpage", async () => {
|
|
129
|
+
const mockResponse = JSON.stringify({
|
|
130
|
+
suggestions: [{ text: "test speed" }, { text: "test internet" }],
|
|
131
|
+
});
|
|
132
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
133
|
+
body: mockResponse,
|
|
134
|
+
headers: new Headers(),
|
|
135
|
+
});
|
|
136
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "startpage", mockOptions);
|
|
137
|
+
(0, vitest_1.expect)(result.source).toBe("startpage");
|
|
138
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
139
|
+
});
|
|
140
|
+
(0, vitest_1.it)("should get suggestions from Qwant", async () => {
|
|
141
|
+
const mockResponse = JSON.stringify({
|
|
142
|
+
data: {
|
|
143
|
+
items: [{ value: "test speed" }, { value: "test internet" }],
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
147
|
+
body: mockResponse,
|
|
148
|
+
headers: new Headers(),
|
|
149
|
+
});
|
|
150
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "qwant", mockOptions);
|
|
151
|
+
(0, vitest_1.expect)(result.source).toBe("qwant");
|
|
152
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
153
|
+
});
|
|
154
|
+
(0, vitest_1.it)("should get suggestions from Swisscows", async () => {
|
|
155
|
+
const mockResponse = JSON.stringify(["test speed", "test internet"]);
|
|
156
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
157
|
+
body: mockResponse,
|
|
158
|
+
headers: new Headers(),
|
|
159
|
+
});
|
|
160
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "swisscows", mockOptions);
|
|
161
|
+
(0, vitest_1.expect)(result.source).toBe("swisscows");
|
|
162
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test speed", "test internet"]);
|
|
163
|
+
});
|
|
164
|
+
(0, vitest_1.it)("should fallback to DuckDuckGo when no provider specified", async () => {
|
|
165
|
+
const mockResponse = JSON.stringify([{ phrase: "test fallback" }]);
|
|
166
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
167
|
+
body: mockResponse,
|
|
168
|
+
headers: new Headers(),
|
|
169
|
+
});
|
|
170
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery);
|
|
171
|
+
(0, vitest_1.expect)(result.source).toBe("duckduckgo");
|
|
172
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["test fallback"]);
|
|
173
|
+
});
|
|
174
|
+
(0, vitest_1.it)("should handle errors gracefully", async () => {
|
|
175
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockRejectedValue(new Error("Network Error"));
|
|
176
|
+
await (0, vitest_1.expect)((0, autocomplete_1.getSuggestions)(mockQuery, "google")).rejects.toThrow("Failed to get suggestions from google");
|
|
177
|
+
});
|
|
178
|
+
(0, vitest_1.it)("should respect limit option", async () => {
|
|
179
|
+
const mockResponse = JSON.stringify(["test", ["1", "2", "3", "4", "5", "6"]]);
|
|
180
|
+
vitest_1.vi.spyOn(common, "fetchWithDetection").mockResolvedValue({
|
|
181
|
+
body: mockResponse,
|
|
182
|
+
headers: new Headers(),
|
|
183
|
+
});
|
|
184
|
+
const result = await (0, autocomplete_1.getSuggestions)(mockQuery, "google", { limit: 3 });
|
|
185
|
+
(0, vitest_1.expect)(result.suggestions).toHaveLength(3);
|
|
186
|
+
(0, vitest_1.expect)(result.suggestions).toEqual(["1", "2", "3"]);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { ProxyConfig, ScraperOptions } from "../types";
|
|
2
|
+
import type { Browser } from "puppeteer";
|
|
3
|
+
export declare const BOT_PROTECTION_PATTERNS: {
|
|
4
|
+
cloudflare: string[];
|
|
5
|
+
perimeterx: string[];
|
|
6
|
+
akamai: string[];
|
|
7
|
+
datadome: string[];
|
|
8
|
+
generic: string[];
|
|
9
|
+
};
|
|
10
|
+
export declare function detectBotProtection(headers: Headers | Map<string, string> | Record<string, string> | object | undefined, body: string): boolean;
|
|
11
|
+
export declare function parseProxyConfig(proxy?: ProxyConfig | string): {
|
|
12
|
+
url: string;
|
|
13
|
+
type: string;
|
|
14
|
+
} | null;
|
|
15
|
+
export declare function createRealisticHeaders(): Record<string, string>;
|
|
16
|
+
export declare function createStealthBrowser(proxy?: {
|
|
17
|
+
url: string;
|
|
18
|
+
type: string;
|
|
19
|
+
}): Promise<Browser>;
|
|
20
|
+
export declare function fetchWithDetection(url: string, options: ScraperOptions): Promise<{
|
|
21
|
+
headers: Headers;
|
|
22
|
+
body: string;
|
|
23
|
+
}>;
|
|
24
|
+
export declare function isUrlAccessible(url: string): Promise<boolean>;
|
|
25
|
+
export declare function cleanText(text: string): string;
|
|
26
|
+
export declare function getCacheKey(query: string, options: ScraperOptions): string;
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.BOT_PROTECTION_PATTERNS = void 0;
|
|
7
|
+
exports.detectBotProtection = detectBotProtection;
|
|
8
|
+
exports.parseProxyConfig = parseProxyConfig;
|
|
9
|
+
exports.createRealisticHeaders = createRealisticHeaders;
|
|
10
|
+
exports.createStealthBrowser = createStealthBrowser;
|
|
11
|
+
exports.fetchWithDetection = fetchWithDetection;
|
|
12
|
+
exports.isUrlAccessible = isUrlAccessible;
|
|
13
|
+
exports.cleanText = cleanText;
|
|
14
|
+
exports.getCacheKey = getCacheKey;
|
|
15
|
+
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
|
|
16
|
+
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
|
17
|
+
// Use stealth plugin
|
|
18
|
+
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
|
19
|
+
// Type guard for objects with entries method (Headers, Map, etc.)
|
|
20
|
+
function hasEntries(value) {
|
|
21
|
+
return (typeof value === "object" &&
|
|
22
|
+
value !== null &&
|
|
23
|
+
"entries" in value &&
|
|
24
|
+
typeof value.entries === "function");
|
|
25
|
+
}
|
|
26
|
+
// Bot detection patterns
|
|
27
|
+
exports.BOT_PROTECTION_PATTERNS = {
|
|
28
|
+
cloudflare: [
|
|
29
|
+
"cf-ray",
|
|
30
|
+
"__cf_bm",
|
|
31
|
+
"cloudflare",
|
|
32
|
+
"challenge-platform",
|
|
33
|
+
"Just a moment...",
|
|
34
|
+
"Checking your browser",
|
|
35
|
+
"DDoS protection by Cloudflare",
|
|
36
|
+
],
|
|
37
|
+
perimeterx: ["_px", "perimeterx", "px-captcha", "PX", "bot-management"],
|
|
38
|
+
akamai: ["akamai", "ak_bmsc", "akamaighost", "akamaized", "edgekey"],
|
|
39
|
+
datadome: ["datadome", "__ddg_", "x-datadome", "ddg-", "bot-detection"],
|
|
40
|
+
generic: [
|
|
41
|
+
"captcha",
|
|
42
|
+
"recaptcha",
|
|
43
|
+
"hcaptcha",
|
|
44
|
+
"access denied",
|
|
45
|
+
"403 forbidden",
|
|
46
|
+
"rate limit",
|
|
47
|
+
"too many requests",
|
|
48
|
+
"blocked",
|
|
49
|
+
"security check",
|
|
50
|
+
"unauthorized",
|
|
51
|
+
],
|
|
52
|
+
};
|
|
53
|
+
// Helper function to detect bot protection
|
|
54
|
+
function detectBotProtection(headers, body) {
|
|
55
|
+
// Check headers
|
|
56
|
+
if (headers) {
|
|
57
|
+
// Handle Headers object or Map
|
|
58
|
+
if (hasEntries(headers)) {
|
|
59
|
+
for (const [key, value] of headers.entries()) {
|
|
60
|
+
const headerContent = `${key}: ${value}`.toLowerCase();
|
|
61
|
+
for (const patterns of Object.values(exports.BOT_PROTECTION_PATTERNS)) {
|
|
62
|
+
for (const pattern of patterns) {
|
|
63
|
+
if (headerContent.includes(pattern.toLowerCase())) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Handle plain object
|
|
71
|
+
else if (typeof headers === "object") {
|
|
72
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
73
|
+
const headerContent = `${key}: ${value}`.toLowerCase();
|
|
74
|
+
for (const patterns of Object.values(exports.BOT_PROTECTION_PATTERNS)) {
|
|
75
|
+
for (const pattern of patterns) {
|
|
76
|
+
if (headerContent.includes(pattern.toLowerCase())) {
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// Check body content
|
|
85
|
+
if (body) {
|
|
86
|
+
const bodyLower = body.toLowerCase();
|
|
87
|
+
for (const patterns of Object.values(exports.BOT_PROTECTION_PATTERNS)) {
|
|
88
|
+
for (const pattern of patterns) {
|
|
89
|
+
if (bodyLower.includes(pattern.toLowerCase())) {
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
// Parse proxy configuration
|
|
98
|
+
function parseProxyConfig(proxy) {
|
|
99
|
+
if (!proxy)
|
|
100
|
+
return null;
|
|
101
|
+
if (typeof proxy === "string") {
|
|
102
|
+
// Parse proxy URL
|
|
103
|
+
try {
|
|
104
|
+
const url = new URL(proxy);
|
|
105
|
+
return {
|
|
106
|
+
url: proxy,
|
|
107
|
+
type: url.protocol.replace(":", ""),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
throw new Error("Invalid proxy URL format");
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Build proxy URL from config
|
|
115
|
+
const auth = proxy.auth ? `${proxy.auth.username}:${proxy.auth.password}@` : "";
|
|
116
|
+
const proxyUrl = `${proxy.type}://${auth}${proxy.host}:${proxy.port}`;
|
|
117
|
+
return {
|
|
118
|
+
url: proxyUrl,
|
|
119
|
+
type: proxy.type,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
// Create realistic headers for basic requests
|
|
123
|
+
function createRealisticHeaders() {
|
|
124
|
+
const userAgents = [
|
|
125
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
126
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
127
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
|
|
128
|
+
];
|
|
129
|
+
return {
|
|
130
|
+
"User-Agent": userAgents[Math.floor(Math.random() * userAgents.length)],
|
|
131
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
132
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
133
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
134
|
+
DNT: "1",
|
|
135
|
+
Connection: "keep-alive",
|
|
136
|
+
"Upgrade-Insecure-Requests": "1",
|
|
137
|
+
"Sec-Fetch-Dest": "document",
|
|
138
|
+
"Sec-Fetch-Mode": "navigate",
|
|
139
|
+
"Sec-Fetch-Site": "none",
|
|
140
|
+
"Cache-Control": "max-age=0",
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// Puppeteer stealth configuration with enhanced options
|
|
144
|
+
async function createStealthBrowser(proxy) {
|
|
145
|
+
const launchOptions = {
|
|
146
|
+
headless: true,
|
|
147
|
+
args: [
|
|
148
|
+
"--no-sandbox",
|
|
149
|
+
"--disable-setuid-sandbox",
|
|
150
|
+
"--disable-dev-shm-usage",
|
|
151
|
+
"--disable-accelerated-2d-canvas",
|
|
152
|
+
"--no-first-run",
|
|
153
|
+
"--no-zygote",
|
|
154
|
+
"--single-process",
|
|
155
|
+
"--disable-gpu",
|
|
156
|
+
"--disable-web-security",
|
|
157
|
+
"--disable-features=VizDisplayCompositor",
|
|
158
|
+
"--ignore-certificate-errors",
|
|
159
|
+
"--ignore-certificate-errors-spki-list",
|
|
160
|
+
],
|
|
161
|
+
};
|
|
162
|
+
if (proxy) {
|
|
163
|
+
launchOptions.args.push(`--proxy-server=${proxy.url}`);
|
|
164
|
+
}
|
|
165
|
+
const browser = await puppeteer_extra_1.default.launch(launchOptions);
|
|
166
|
+
// Additional stealth measures
|
|
167
|
+
try {
|
|
168
|
+
const pages = await browser.pages();
|
|
169
|
+
const page = pages.length > 0 ? pages[0] : await browser.newPage();
|
|
170
|
+
const context = page.browserContext();
|
|
171
|
+
await context.overridePermissions("https://www.google.com", []);
|
|
172
|
+
await context.overridePermissions("https://duckduckgo.com", []);
|
|
173
|
+
if (pages.length === 0)
|
|
174
|
+
await page.close();
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
// Ignore permissions errors if context doesn't support it
|
|
178
|
+
}
|
|
179
|
+
return browser;
|
|
180
|
+
}
|
|
181
|
+
// Fetch with bot detection
|
|
182
|
+
async function fetchWithDetection(url, options) {
|
|
183
|
+
const proxy = parseProxyConfig(options.proxy);
|
|
184
|
+
const headers = createRealisticHeaders();
|
|
185
|
+
const fetchOptions = {
|
|
186
|
+
headers,
|
|
187
|
+
timeout: options.timeout || 10000,
|
|
188
|
+
};
|
|
189
|
+
if (proxy) {
|
|
190
|
+
try {
|
|
191
|
+
let agent;
|
|
192
|
+
if (proxy.type === "socks4" || proxy.type === "socks5") {
|
|
193
|
+
const { SocksProxyAgent } = await import("socks-proxy-agent");
|
|
194
|
+
agent = new SocksProxyAgent(proxy.url);
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
const { HttpsProxyAgent } = await import("https-proxy-agent");
|
|
198
|
+
agent = new HttpsProxyAgent(proxy.url);
|
|
199
|
+
}
|
|
200
|
+
fetchOptions.agent = agent;
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
204
|
+
throw {
|
|
205
|
+
message: `Proxy connection failed: ${errorMessage}`,
|
|
206
|
+
code: "PROXY_CONNECTION_FAILED",
|
|
207
|
+
originalError: error,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
try {
|
|
212
|
+
const response = await fetch(url, fetchOptions);
|
|
213
|
+
const body = await response.text();
|
|
214
|
+
if (detectBotProtection(response.headers, body)) {
|
|
215
|
+
throw new Error("Bot protection detected");
|
|
216
|
+
}
|
|
217
|
+
return {
|
|
218
|
+
headers: response.headers,
|
|
219
|
+
body,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
catch (error) {
|
|
223
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
224
|
+
if (errorMessage.includes("407") || errorMessage.includes("authentication")) {
|
|
225
|
+
throw {
|
|
226
|
+
message: "Proxy authentication failed",
|
|
227
|
+
code: "PROXY_AUTH_FAILED",
|
|
228
|
+
originalError: error,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
if (errorMessage.includes("ECONNREFUSED") || errorMessage.includes("ENOTFOUND")) {
|
|
232
|
+
throw {
|
|
233
|
+
message: "Proxy connection refused",
|
|
234
|
+
code: "PROXY_CONNECTION_REFUSED",
|
|
235
|
+
originalError: error,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
throw error;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// check if url is accessible
|
|
242
|
+
async function isUrlAccessible(url) {
|
|
243
|
+
try {
|
|
244
|
+
const response = await fetch(url, { method: "HEAD" });
|
|
245
|
+
return response.ok;
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
return false;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
// clean up text by removing excessive whitespace and making it more readable
|
|
252
|
+
function cleanText(text) {
|
|
253
|
+
if (!text)
|
|
254
|
+
return "";
|
|
255
|
+
return text
|
|
256
|
+
.replace(/[\n\s\r]+/g, " ")
|
|
257
|
+
.replace(/([.!?])\s+/g, "$1\n\n")
|
|
258
|
+
.trim();
|
|
259
|
+
}
|
|
260
|
+
// Helper function to get cache key
|
|
261
|
+
function getCacheKey(query, options) {
|
|
262
|
+
return `${query}-${JSON.stringify(options)}`;
|
|
263
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|