llm-search-tools 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +244 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/integration.test.d.ts +1 -0
- package/dist/integration.test.js +237 -0
- package/dist/modules/answerbox.test.d.ts +1 -0
- package/dist/modules/answerbox.test.js +105 -0
- package/dist/modules/autocomplete.d.ts +11 -0
- package/dist/modules/autocomplete.js +159 -0
- package/dist/modules/autocomplete.test.d.ts +1 -0
- package/dist/modules/autocomplete.test.js +188 -0
- package/dist/modules/common.d.ts +26 -0
- package/dist/modules/common.js +263 -0
- package/dist/modules/common.test.d.ts +1 -0
- package/dist/modules/common.test.js +87 -0
- package/dist/modules/crawl.d.ts +9 -0
- package/dist/modules/crawl.js +117 -0
- package/dist/modules/crawl.test.d.ts +1 -0
- package/dist/modules/crawl.test.js +48 -0
- package/dist/modules/events.d.ts +8 -0
- package/dist/modules/events.js +129 -0
- package/dist/modules/events.test.d.ts +1 -0
- package/dist/modules/events.test.js +104 -0
- package/dist/modules/finance.d.ts +10 -0
- package/dist/modules/finance.js +20 -0
- package/dist/modules/finance.test.d.ts +1 -0
- package/dist/modules/finance.test.js +77 -0
- package/dist/modules/flights.d.ts +8 -0
- package/dist/modules/flights.js +135 -0
- package/dist/modules/flights.test.d.ts +1 -0
- package/dist/modules/flights.test.js +128 -0
- package/dist/modules/hackernews.d.ts +8 -0
- package/dist/modules/hackernews.js +87 -0
- package/dist/modules/hackernews.js.map +1 -0
- package/dist/modules/images.test.d.ts +1 -0
- package/dist/modules/images.test.js +145 -0
- package/dist/modules/integrations.test.d.ts +1 -0
- package/dist/modules/integrations.test.js +93 -0
- package/dist/modules/media.d.ts +11 -0
- package/dist/modules/media.js +132 -0
- package/dist/modules/media.test.d.ts +1 -0
- package/dist/modules/media.test.js +186 -0
- package/dist/modules/news.d.ts +3 -0
- package/dist/modules/news.js +39 -0
- package/dist/modules/news.test.d.ts +1 -0
- package/dist/modules/news.test.js +88 -0
- package/dist/modules/parser.d.ts +19 -0
- package/dist/modules/parser.js +361 -0
- package/dist/modules/parser.test.d.ts +1 -0
- package/dist/modules/parser.test.js +151 -0
- package/dist/modules/reddit.d.ts +21 -0
- package/dist/modules/reddit.js +107 -0
- package/dist/modules/scrape.d.ts +16 -0
- package/dist/modules/scrape.js +272 -0
- package/dist/modules/scrape.test.d.ts +1 -0
- package/dist/modules/scrape.test.js +232 -0
- package/dist/modules/scraper.d.ts +12 -0
- package/dist/modules/scraper.js +640 -0
- package/dist/modules/scrapers/anidb.d.ts +8 -0
- package/dist/modules/scrapers/anidb.js +156 -0
- package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
- package/dist/modules/scrapers/duckduckgo.js +284 -0
- package/dist/modules/scrapers/google-news.d.ts +2 -0
- package/dist/modules/scrapers/google-news.js +60 -0
- package/dist/modules/scrapers/google.d.ts +6 -0
- package/dist/modules/scrapers/google.js +211 -0
- package/dist/modules/scrapers/searxng.d.ts +2 -0
- package/dist/modules/scrapers/searxng.js +93 -0
- package/dist/modules/scrapers/thetvdb.d.ts +3 -0
- package/dist/modules/scrapers/thetvdb.js +147 -0
- package/dist/modules/scrapers/tmdb.d.ts +3 -0
- package/dist/modules/scrapers/tmdb.js +172 -0
- package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
- package/dist/modules/scrapers/yahoo-finance.js +33 -0
- package/dist/modules/search.d.ts +5 -0
- package/dist/modules/search.js +45 -0
- package/dist/modules/search.js.map +1 -0
- package/dist/modules/search.test.d.ts +1 -0
- package/dist/modules/search.test.js +219 -0
- package/dist/modules/urbandictionary.d.ts +12 -0
- package/dist/modules/urbandictionary.js +26 -0
- package/dist/modules/webpage.d.ts +4 -0
- package/dist/modules/webpage.js +150 -0
- package/dist/modules/webpage.js.map +1 -0
- package/dist/modules/wikipedia.d.ts +5 -0
- package/dist/modules/wikipedia.js +85 -0
- package/dist/modules/wikipedia.js.map +1 -0
- package/dist/scripts/interactive-search.d.ts +1 -0
- package/dist/scripts/interactive-search.js +98 -0
- package/dist/test.d.ts +1 -0
- package/dist/test.js +179 -0
- package/dist/test.js.map +1 -0
- package/dist/testBraveSearch.d.ts +1 -0
- package/dist/testBraveSearch.js +34 -0
- package/dist/testDuckDuckGo.d.ts +1 -0
- package/dist/testDuckDuckGo.js +52 -0
- package/dist/testEcosia.d.ts +1 -0
- package/dist/testEcosia.js +57 -0
- package/dist/testSearchModule.d.ts +1 -0
- package/dist/testSearchModule.js +95 -0
- package/dist/testwebpage.d.ts +1 -0
- package/dist/testwebpage.js +81 -0
- package/dist/types.d.ts +174 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/createTestDocx.d.ts +1 -0
- package/dist/utils/createTestDocx.js +58 -0
- package/dist/utils/htmlcleaner.d.ts +20 -0
- package/dist/utils/htmlcleaner.js +172 -0
- package/docs/README.md +275 -0
- package/docs/autocomplete.md +73 -0
- package/docs/crawling.md +88 -0
- package/docs/events.md +58 -0
- package/docs/examples.md +158 -0
- package/docs/finance.md +60 -0
- package/docs/flights.md +71 -0
- package/docs/hackernews.md +121 -0
- package/docs/media.md +87 -0
- package/docs/news.md +75 -0
- package/docs/parser.md +197 -0
- package/docs/scraper.md +347 -0
- package/docs/search.md +106 -0
- package/docs/wikipedia.md +91 -0
- package/package.json +97 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const common_1 = require("./common");
|
|
5
|
+
(0, vitest_1.describe)("Common Utilities", () => {
|
|
6
|
+
(0, vitest_1.describe)("cleanText", () => {
|
|
7
|
+
(0, vitest_1.it)("should remove excessive whitespace", () => {
|
|
8
|
+
const input = " Hello world \n\n how are you ";
|
|
9
|
+
(0, vitest_1.expect)((0, common_1.cleanText)(input)).toBe("Hello world how are you");
|
|
10
|
+
});
|
|
11
|
+
(0, vitest_1.it)("should format sentences with newlines", () => {
|
|
12
|
+
const input = "Hello world. How are you? I am fine!";
|
|
13
|
+
const expected = "Hello world.\n\nHow are you?\n\nI am fine!";
|
|
14
|
+
(0, vitest_1.expect)((0, common_1.cleanText)(input)).toBe(expected);
|
|
15
|
+
});
|
|
16
|
+
(0, vitest_1.it)("should handle empty strings", () => {
|
|
17
|
+
(0, vitest_1.expect)((0, common_1.cleanText)("")).toBe("");
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
(0, vitest_1.describe)("parseProxyConfig", () => {
|
|
21
|
+
(0, vitest_1.it)("should return null for undefined", () => {
|
|
22
|
+
(0, vitest_1.expect)((0, common_1.parseProxyConfig)(undefined)).toBeNull();
|
|
23
|
+
});
|
|
24
|
+
(0, vitest_1.it)("should parse string proxy URL", () => {
|
|
25
|
+
const proxy = "http://user:pass@host:8080";
|
|
26
|
+
const result = (0, common_1.parseProxyConfig)(proxy);
|
|
27
|
+
(0, vitest_1.expect)(result).toEqual({
|
|
28
|
+
url: "http://user:pass@host:8080",
|
|
29
|
+
type: "http",
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
(0, vitest_1.it)("should parse object proxy config", () => {
|
|
33
|
+
const proxy = {
|
|
34
|
+
type: "socks5",
|
|
35
|
+
host: "localhost",
|
|
36
|
+
port: 9050,
|
|
37
|
+
auth: {
|
|
38
|
+
username: "user",
|
|
39
|
+
password: "pass",
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
const result = (0, common_1.parseProxyConfig)(proxy);
|
|
43
|
+
(0, vitest_1.expect)(result).toEqual({
|
|
44
|
+
url: "socks5://user:pass@localhost:9050",
|
|
45
|
+
type: "socks5",
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
(0, vitest_1.it)("should throw error for invalid proxy string", () => {
|
|
49
|
+
(0, vitest_1.expect)(() => (0, common_1.parseProxyConfig)("not-a-url")).toThrow("Invalid proxy URL format");
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.describe)("detectBotProtection", () => {
|
|
53
|
+
(0, vitest_1.it)("should return false for normal content", () => {
|
|
54
|
+
(0, vitest_1.expect)((0, common_1.detectBotProtection)({}, "<html><body>Normal content</body></html>")).toBe(false);
|
|
55
|
+
});
|
|
56
|
+
(0, vitest_1.it)("should detect Cloudflare", () => {
|
|
57
|
+
(0, vitest_1.expect)((0, common_1.detectBotProtection)({}, "Just a moment...")).toBe(true);
|
|
58
|
+
(0, vitest_1.expect)((0, common_1.detectBotProtection)({ "cf-ray": "123" }, "")).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
(0, vitest_1.it)("should detect 403 forbidden", () => {
|
|
61
|
+
(0, vitest_1.expect)((0, common_1.detectBotProtection)({}, "403 Forbidden")).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
(0, vitest_1.describe)("isUrlAccessible", () => {
|
|
65
|
+
(0, vitest_1.beforeEach)(() => {
|
|
66
|
+
global.fetch = vitest_1.vi.fn();
|
|
67
|
+
});
|
|
68
|
+
(0, vitest_1.afterEach)(() => {
|
|
69
|
+
vitest_1.vi.resetAllMocks();
|
|
70
|
+
});
|
|
71
|
+
(0, vitest_1.it)("should return true if fetch succeeds", async () => {
|
|
72
|
+
global.fetch.mockResolvedValue({ ok: true });
|
|
73
|
+
const result = await (0, common_1.isUrlAccessible)("https://example.com");
|
|
74
|
+
(0, vitest_1.expect)(result).toBe(true);
|
|
75
|
+
});
|
|
76
|
+
(0, vitest_1.it)("should return false if fetch fails", async () => {
|
|
77
|
+
global.fetch.mockRejectedValue(new Error("Network error"));
|
|
78
|
+
const result = await (0, common_1.isUrlAccessible)("https://example.com");
|
|
79
|
+
(0, vitest_1.expect)(result).toBe(false);
|
|
80
|
+
});
|
|
81
|
+
(0, vitest_1.it)("should return false if response is not ok", async () => {
|
|
82
|
+
global.fetch.mockResolvedValue({ ok: false });
|
|
83
|
+
const result = await (0, common_1.isUrlAccessible)("https://example.com");
|
|
84
|
+
(0, vitest_1.expect)(result).toBe(false);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { CrawlOptions, CrawlResult } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* Crawl a website starting from a given URL.
|
|
4
|
+
*
|
|
5
|
+
* @param startUrl The URL to start crawling from
|
|
6
|
+
* @param options Crawling options
|
|
7
|
+
* @returns Promise<CrawlResult> Array of crawled pages
|
|
8
|
+
*/
|
|
9
|
+
export declare function crawl(startUrl: string, options?: CrawlOptions): Promise<CrawlResult>;
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.crawl = crawl;
|
|
7
|
+
const crawlee_1 = require("crawlee");
|
|
8
|
+
const scrape_1 = require("./scrape");
|
|
9
|
+
const common_1 = require("./common");
|
|
10
|
+
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
|
|
11
|
+
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
|
12
|
+
// Ensure stealth plugin is used
|
|
13
|
+
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
|
14
|
+
/**
|
|
15
|
+
* Crawl a website starting from a given URL.
|
|
16
|
+
*
|
|
17
|
+
* @param startUrl The URL to start crawling from
|
|
18
|
+
* @param options Crawling options
|
|
19
|
+
* @returns Promise<CrawlResult> Array of crawled pages
|
|
20
|
+
*/
|
|
21
|
+
async function crawl(startUrl, options = {}) {
|
|
22
|
+
const results = [];
|
|
23
|
+
const maxPages = options.maxPages || options.limit || 10;
|
|
24
|
+
const maxDepth = options.maxDepth || 2;
|
|
25
|
+
const usePuppeteer = options.crawlType === "puppeteer" || options.forcePuppeteer;
|
|
26
|
+
const stayOnDomain = options.stayOnDomain !== false; // Default to true if not specified
|
|
27
|
+
// Configure global settings to avoid cluttering the filesystem and console
|
|
28
|
+
const config = new crawlee_1.Configuration({
|
|
29
|
+
persistStorage: false,
|
|
30
|
+
purgeOnStart: true,
|
|
31
|
+
availableMemoryRatio: 0.8,
|
|
32
|
+
});
|
|
33
|
+
// Create the request handler closure
|
|
34
|
+
const requestHandler = async (context) => {
|
|
35
|
+
const { request, enqueueLinks, log } = context;
|
|
36
|
+
try {
|
|
37
|
+
const url = request.loadedUrl || request.url;
|
|
38
|
+
const userData = request.userData;
|
|
39
|
+
const depth = userData.depth || 0;
|
|
40
|
+
log.debug(`Processing ${url} at depth ${depth}`);
|
|
41
|
+
let html;
|
|
42
|
+
// Check if context has 'page' property (Puppeteer)
|
|
43
|
+
if ("page" in context) {
|
|
44
|
+
html = await context.page.content();
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
html = context.body.toString();
|
|
48
|
+
}
|
|
49
|
+
// Normalize content using our existing robust normalizer
|
|
50
|
+
const content = (0, scrape_1.normalizeContent)({
|
|
51
|
+
url,
|
|
52
|
+
html,
|
|
53
|
+
skipReadability: false, // We want readable content
|
|
54
|
+
});
|
|
55
|
+
const crawledPage = {
|
|
56
|
+
...content,
|
|
57
|
+
url,
|
|
58
|
+
depth,
|
|
59
|
+
};
|
|
60
|
+
results.push(crawledPage);
|
|
61
|
+
// Enqueue links if we haven't reached max depth
|
|
62
|
+
// Note: maxPages is handled by maxRequestsPerCrawl in the crawler config
|
|
63
|
+
if (depth < maxDepth) {
|
|
64
|
+
await enqueueLinks({
|
|
65
|
+
strategy: stayOnDomain ? "same-domain" : "all",
|
|
66
|
+
userData: { depth: depth + 1 },
|
|
67
|
+
transformRequestFunction: (req) => {
|
|
68
|
+
// Ignore robots.txt if requested (Crawlee respects it by default usually)
|
|
69
|
+
// But we can also filter extensions here if needed
|
|
70
|
+
return req;
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
log.error(`Failed to process ${request.url}: ${error}`);
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
// Proxy configuration logic could go here
|
|
80
|
+
if (options.proxy) {
|
|
81
|
+
(0, common_1.parseProxyConfig)(options.proxy);
|
|
82
|
+
// Note: detailed proxy configuration for Crawlee would require creating a ProxyConfiguration
|
|
83
|
+
// instance, but for now we rely on Puppeteer's launchContext or Cheerio's defaults
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
if (usePuppeteer) {
|
|
87
|
+
const crawler = new crawlee_1.PuppeteerCrawler({
|
|
88
|
+
requestHandler: requestHandler,
|
|
89
|
+
maxRequestsPerCrawl: maxPages,
|
|
90
|
+
launchContext: {
|
|
91
|
+
launcher: puppeteer_extra_1.default,
|
|
92
|
+
launchOptions: {
|
|
93
|
+
headless: true,
|
|
94
|
+
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"],
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
}, config);
|
|
98
|
+
await crawler.run([startUrl]);
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
const crawler = new crawlee_1.CheerioCrawler({
|
|
102
|
+
requestHandler: requestHandler,
|
|
103
|
+
maxRequestsPerCrawl: maxPages,
|
|
104
|
+
additionalMimeTypes: ["text/html", "application/xhtml+xml"],
|
|
105
|
+
}, config);
|
|
106
|
+
await crawler.run([startUrl]);
|
|
107
|
+
}
|
|
108
|
+
return results;
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
throw {
|
|
112
|
+
message: `Crawling failed: ${error.message}`,
|
|
113
|
+
code: "CRAWL_ERROR",
|
|
114
|
+
originalError: error,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const crawl_1 = require("./crawl");
|
|
5
|
+
const crawlee_1 = require("crawlee");
|
|
6
|
+
// Mock crawlee
|
|
7
|
+
vitest_1.vi.mock("crawlee", async () => {
|
|
8
|
+
const actual = await vitest_1.vi.importActual("crawlee");
|
|
9
|
+
return {
|
|
10
|
+
...actual,
|
|
11
|
+
CheerioCrawler: vitest_1.vi.fn(),
|
|
12
|
+
PuppeteerCrawler: vitest_1.vi.fn(),
|
|
13
|
+
Configuration: vitest_1.vi.fn(),
|
|
14
|
+
};
|
|
15
|
+
});
|
|
16
|
+
(0, vitest_1.describe)("Crawl Module", () => {
|
|
17
|
+
(0, vitest_1.beforeEach)(() => {
|
|
18
|
+
vitest_1.vi.clearAllMocks();
|
|
19
|
+
// Setup default mock implementation for run
|
|
20
|
+
crawlee_1.CheerioCrawler.mockImplementation(() => ({
|
|
21
|
+
run: vitest_1.vi.fn().mockResolvedValue(undefined),
|
|
22
|
+
}));
|
|
23
|
+
crawlee_1.PuppeteerCrawler.mockImplementation(() => ({
|
|
24
|
+
run: vitest_1.vi.fn().mockResolvedValue(undefined),
|
|
25
|
+
}));
|
|
26
|
+
});
|
|
27
|
+
(0, vitest_1.it)("should use CheerioCrawler by default", async () => {
|
|
28
|
+
const results = await (0, crawl_1.crawl)("https://example.com");
|
|
29
|
+
(0, vitest_1.expect)(crawlee_1.CheerioCrawler).toHaveBeenCalled();
|
|
30
|
+
(0, vitest_1.expect)(crawlee_1.PuppeteerCrawler).not.toHaveBeenCalled();
|
|
31
|
+
(0, vitest_1.expect)(results).toEqual([]); // Empty because we mocked run and didn't populate results
|
|
32
|
+
});
|
|
33
|
+
(0, vitest_1.it)("should use PuppeteerCrawler when requested", async () => {
|
|
34
|
+
await (0, crawl_1.crawl)("https://example.com", { crawlType: "puppeteer" });
|
|
35
|
+
(0, vitest_1.expect)(crawlee_1.PuppeteerCrawler).toHaveBeenCalled();
|
|
36
|
+
(0, vitest_1.expect)(crawlee_1.CheerioCrawler).not.toHaveBeenCalled();
|
|
37
|
+
});
|
|
38
|
+
(0, vitest_1.it)("should use PuppeteerCrawler when forcePuppeteer is true", async () => {
|
|
39
|
+
await (0, crawl_1.crawl)("https://example.com", { forcePuppeteer: true });
|
|
40
|
+
(0, vitest_1.expect)(crawlee_1.PuppeteerCrawler).toHaveBeenCalled();
|
|
41
|
+
});
|
|
42
|
+
(0, vitest_1.it)("should configure crawler with maxPages", async () => {
|
|
43
|
+
await (0, crawl_1.crawl)("https://example.com", { maxPages: 5 });
|
|
44
|
+
(0, vitest_1.expect)(crawlee_1.CheerioCrawler).toHaveBeenCalledWith(vitest_1.expect.objectContaining({
|
|
45
|
+
maxRequestsPerCrawl: 5,
|
|
46
|
+
}), vitest_1.expect.any(Object));
|
|
47
|
+
});
|
|
48
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { EventResult, EventSearchOptions } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* Search for events using Google Events
|
|
4
|
+
* @param query Search query (e.g. "concerts in New York")
|
|
5
|
+
* @param options Search options
|
|
6
|
+
* @returns Promise<EventResult>
|
|
7
|
+
*/
|
|
8
|
+
export declare function searchEvents(query: string, options?: EventSearchOptions): Promise<EventResult>;
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchEvents = searchEvents;
|
|
4
|
+
const common_1 = require("./common");
|
|
5
|
+
/**
|
|
6
|
+
* Search for events using Google Events
|
|
7
|
+
* @param query Search query (e.g. "concerts in New York")
|
|
8
|
+
* @param options Search options
|
|
9
|
+
* @returns Promise<EventResult>
|
|
10
|
+
*/
|
|
11
|
+
async function searchEvents(query, options = {}) {
|
|
12
|
+
// Construct URL
|
|
13
|
+
// We use the standard search with ibp=htl;events param which triggers the events UI
|
|
14
|
+
const url = `https://www.google.com/search?q=${encodeURIComponent(query)}&ibp=htl;events`;
|
|
15
|
+
if (options.date) {
|
|
16
|
+
// Google supports date filters via chip selection or query refinement
|
|
17
|
+
// Adding it to query is often easiest: "events in New York tomorrow"
|
|
18
|
+
// But let's append it to the query string if provided
|
|
19
|
+
// url += `&tbs=qdr:${options.date}`; // This is for search results, might not work for Events UI
|
|
20
|
+
// Better to just modify the query passed in or rely on user to include date in query
|
|
21
|
+
}
|
|
22
|
+
const proxy = (0, common_1.parseProxyConfig)(options.proxy);
|
|
23
|
+
const browser = await (0, common_1.createStealthBrowser)(proxy || undefined);
|
|
24
|
+
try {
|
|
25
|
+
const page = await browser.newPage();
|
|
26
|
+
await page.setViewport({ width: 1920, height: 1080 });
|
|
27
|
+
await page.setExtraHTTPHeaders((0, common_1.createRealisticHeaders)());
|
|
28
|
+
// Navigate to Google Events
|
|
29
|
+
await page.goto(url, { waitUntil: "networkidle2", timeout: options.timeout || 30000 });
|
|
30
|
+
// Handle cookie consent if present
|
|
31
|
+
try {
|
|
32
|
+
const consentButton = await page.waitForSelector('button[aria-label="Accept all"]', { timeout: 5000 });
|
|
33
|
+
if (consentButton) {
|
|
34
|
+
await consentButton.click();
|
|
35
|
+
await page.waitForNavigation({ waitUntil: "networkidle2" }).catch(() => { });
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
// No consent button found, proceed
|
|
40
|
+
}
|
|
41
|
+
// Wait for event list to load
|
|
42
|
+
// The main container often has specific data-attributes or classes
|
|
43
|
+
// We'll wait for generic item containers
|
|
44
|
+
try {
|
|
45
|
+
await page.waitForSelector("ul li", { timeout: 10000 });
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
// Fallback
|
|
49
|
+
}
|
|
50
|
+
// Extract events
|
|
51
|
+
const events = await page.evaluate(() => {
|
|
52
|
+
const results = [];
|
|
53
|
+
// Select all text elements that look like events
|
|
54
|
+
// Google Events UI is complex.
|
|
55
|
+
// Strategy: Look for the list items in the side panel or main view.
|
|
56
|
+
// Often they are in a scrollable container.
|
|
57
|
+
// Trying to find the main list items.
|
|
58
|
+
// Common pattern in Google Events: div with jsname and specific attributes.
|
|
59
|
+
// We will look for elements that contain date/time info and titles.
|
|
60
|
+
// Locate the main list container. It usually has role="list" or similar.
|
|
61
|
+
const listItems = document.querySelectorAll("ul li");
|
|
62
|
+
listItems.forEach((item) => {
|
|
63
|
+
try {
|
|
64
|
+
const text = item.innerText;
|
|
65
|
+
if (!text || text.length < 10)
|
|
66
|
+
return;
|
|
67
|
+
// Attempt to split text into lines to guess structure
|
|
68
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
69
|
+
if (lines.length < 3)
|
|
70
|
+
return;
|
|
71
|
+
// Heuristics for Google Events List Item:
|
|
72
|
+
// 1. Date (Month Day)
|
|
73
|
+
// 2. Title
|
|
74
|
+
// 3. Location
|
|
75
|
+
// 4. Time
|
|
76
|
+
// Let's try to extract specific elements if classes exist, otherwise fallback to text analysis
|
|
77
|
+
// Title usually has role="heading" or specific font classes
|
|
78
|
+
const titleEl = item.querySelector('[role="heading"], [aria-level]');
|
|
79
|
+
const title = titleEl?.textContent || lines[0] || "Unknown Event";
|
|
80
|
+
// Date/Time
|
|
81
|
+
// Often first or second line
|
|
82
|
+
const date = lines.find((l) => /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|Today|Tomorrow|Mon|Tue|Wed|Thu|Fri|Sat|Sun/i.test(l)) || "Upcoming";
|
|
83
|
+
// Location
|
|
84
|
+
// Often contains address-like patterns or comes after title
|
|
85
|
+
const location = lines.find((l) => l !== title && l !== date && l.length > 5) || "Unknown Location";
|
|
86
|
+
// Image
|
|
87
|
+
const img = item.querySelector("img");
|
|
88
|
+
const image = img?.src;
|
|
89
|
+
// Link - sometimes the item itself is clickable or contains a link
|
|
90
|
+
const linkEl = item.querySelector("a");
|
|
91
|
+
const link = linkEl?.href;
|
|
92
|
+
// If no link found, construct one (it's usually a google search refinement)
|
|
93
|
+
if (!link) {
|
|
94
|
+
// link = ...
|
|
95
|
+
}
|
|
96
|
+
results.push({
|
|
97
|
+
title,
|
|
98
|
+
date,
|
|
99
|
+
location,
|
|
100
|
+
link,
|
|
101
|
+
description: text, // Store full text as description for now
|
|
102
|
+
image,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
// Skip
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
return results;
|
|
110
|
+
});
|
|
111
|
+
// Filter duplicates and low quality results
|
|
112
|
+
const uniqueEvents = events.filter((e, i, self) => i === self.findIndex((t) => t.title === e.title && t.date === e.date));
|
|
113
|
+
return {
|
|
114
|
+
events: uniqueEvents.slice(0, options.limit || 10),
|
|
115
|
+
url,
|
|
116
|
+
source: "google-events",
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
catch (error) {
|
|
120
|
+
throw {
|
|
121
|
+
message: `Failed to search events: ${error.message}`,
|
|
122
|
+
code: "EVENT_SEARCH_ERROR",
|
|
123
|
+
originalError: error,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
finally {
|
|
127
|
+
await browser.close();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const vitest_1 = require("vitest");
|
|
37
|
+
const events_1 = require("./events");
|
|
38
|
+
const common = __importStar(require("./common"));
|
|
39
|
+
// Mock common module
|
|
40
|
+
vitest_1.vi.mock("./common", async () => {
|
|
41
|
+
const actual = await vitest_1.vi.importActual("./common");
|
|
42
|
+
return {
|
|
43
|
+
...actual,
|
|
44
|
+
createStealthBrowser: vitest_1.vi.fn(),
|
|
45
|
+
};
|
|
46
|
+
});
|
|
47
|
+
(0, vitest_1.describe)("Events Module", () => {
|
|
48
|
+
let mockPage;
|
|
49
|
+
let mockBrowser;
|
|
50
|
+
(0, vitest_1.beforeEach)(() => {
|
|
51
|
+
vitest_1.vi.clearAllMocks();
|
|
52
|
+
mockPage = {
|
|
53
|
+
setViewport: vitest_1.vi.fn(),
|
|
54
|
+
setExtraHTTPHeaders: vitest_1.vi.fn(),
|
|
55
|
+
goto: vitest_1.vi.fn(),
|
|
56
|
+
waitForSelector: vitest_1.vi.fn(),
|
|
57
|
+
evaluate: vitest_1.vi.fn(),
|
|
58
|
+
click: vitest_1.vi.fn(),
|
|
59
|
+
waitForNavigation: vitest_1.vi.fn(),
|
|
60
|
+
$: vitest_1.vi.fn(),
|
|
61
|
+
};
|
|
62
|
+
mockBrowser = {
|
|
63
|
+
newPage: vitest_1.vi.fn().mockResolvedValue(mockPage),
|
|
64
|
+
close: vitest_1.vi.fn(),
|
|
65
|
+
};
|
|
66
|
+
vitest_1.vi.spyOn(common, "createStealthBrowser").mockResolvedValue(mockBrowser);
|
|
67
|
+
});
|
|
68
|
+
(0, vitest_1.it)("should search for events with query string", async () => {
|
|
69
|
+
const mockEvents = [
|
|
70
|
+
{
|
|
71
|
+
title: "Test Event",
|
|
72
|
+
date: "Tomorrow",
|
|
73
|
+
location: "Test Location",
|
|
74
|
+
link: "https://example.com/event",
|
|
75
|
+
description: "A test event description",
|
|
76
|
+
image: "https://example.com/image.jpg",
|
|
77
|
+
},
|
|
78
|
+
];
|
|
79
|
+
mockPage.evaluate.mockResolvedValue(mockEvents);
|
|
80
|
+
const result = await (0, events_1.searchEvents)("concerts in New York");
|
|
81
|
+
(0, vitest_1.expect)(common.createStealthBrowser).toHaveBeenCalled();
|
|
82
|
+
(0, vitest_1.expect)(mockPage.goto).toHaveBeenCalledWith(vitest_1.expect.stringContaining("google.com/search?q=concerts%20in%20New%20York&ibp=htl;events"), vitest_1.expect.any(Object));
|
|
83
|
+
(0, vitest_1.expect)(result.events).toEqual(mockEvents);
|
|
84
|
+
(0, vitest_1.expect)(result.source).toBe("google-events");
|
|
85
|
+
});
|
|
86
|
+
(0, vitest_1.it)("should handle scraping errors gracefully", async () => {
|
|
87
|
+
mockPage.goto.mockRejectedValue(new Error("Navigation failed"));
|
|
88
|
+
await (0, vitest_1.expect)((0, events_1.searchEvents)("query")).rejects.toThrow("Failed to search events");
|
|
89
|
+
(0, vitest_1.expect)(mockBrowser.close).toHaveBeenCalled();
|
|
90
|
+
});
|
|
91
|
+
(0, vitest_1.it)("should attempt to click consent button if found", async () => {
|
|
92
|
+
// Mock waitForSelector implementation for consent button
|
|
93
|
+
mockPage.waitForSelector.mockImplementation((selector) => {
|
|
94
|
+
if (selector.includes("Accept all")) {
|
|
95
|
+
return Promise.resolve({ click: vitest_1.vi.fn() });
|
|
96
|
+
}
|
|
97
|
+
return Promise.resolve(null);
|
|
98
|
+
});
|
|
99
|
+
// Mock evaluate to return empty list so it finishes
|
|
100
|
+
mockPage.evaluate.mockResolvedValue([]);
|
|
101
|
+
await (0, events_1.searchEvents)("query");
|
|
102
|
+
(0, vitest_1.expect)(mockPage.goto).toHaveBeenCalled();
|
|
103
|
+
});
|
|
104
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { FinanceResult } from "../types";
|
|
2
|
+
export { getStockQuote } from "./scrapers/yahoo-finance";
|
|
3
|
+
/**
|
|
4
|
+
* Get a stock quote for a given symbol.
|
|
5
|
+
* Currently uses Yahoo Finance as the primary source.
|
|
6
|
+
*
|
|
7
|
+
* @param symbol The stock symbol (e.g., "AAPL", "GOOGL")
|
|
8
|
+
* @returns Promise<FinanceResult>
|
|
9
|
+
*/
|
|
10
|
+
export declare function getQuote(symbol: string): Promise<FinanceResult>;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getStockQuote = void 0;
|
|
4
|
+
exports.getQuote = getQuote;
|
|
5
|
+
const yahoo_finance_1 = require("./scrapers/yahoo-finance");
|
|
6
|
+
// Re-export specific finance functions
|
|
7
|
+
var yahoo_finance_2 = require("./scrapers/yahoo-finance");
|
|
8
|
+
Object.defineProperty(exports, "getStockQuote", { enumerable: true, get: function () { return yahoo_finance_2.getStockQuote; } });
|
|
9
|
+
/**
|
|
10
|
+
* Get a stock quote for a given symbol.
|
|
11
|
+
* Currently uses Yahoo Finance as the primary source.
|
|
12
|
+
*
|
|
13
|
+
* @param symbol The stock symbol (e.g., "AAPL", "GOOGL")
|
|
14
|
+
* @returns Promise<FinanceResult>
|
|
15
|
+
*/
|
|
16
|
+
async function getQuote(symbol) {
|
|
17
|
+
// 1. Try Yahoo Finance (primary source)
|
|
18
|
+
// If we had other providers (e.g. Google Finance), we would fallback here
|
|
19
|
+
return await (0, yahoo_finance_1.getStockQuote)(symbol);
|
|
20
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const vitest_1 = require("vitest");
|
|
7
|
+
const finance_1 = require("./finance");
|
|
8
|
+
const yahoo_finance_1 = require("./scrapers/yahoo-finance");
|
|
9
|
+
const yahoo_finance2_1 = __importDefault(require("yahoo-finance2"));
|
|
10
|
+
// Mock dependencies
|
|
11
|
+
vitest_1.vi.mock("yahoo-finance2", () => {
|
|
12
|
+
return {
|
|
13
|
+
default: {
|
|
14
|
+
quote: vitest_1.vi.fn(),
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
(0, vitest_1.describe)("Finance Module", () => {
|
|
19
|
+
(0, vitest_1.beforeEach)(() => {
|
|
20
|
+
vitest_1.vi.resetAllMocks();
|
|
21
|
+
});
|
|
22
|
+
(0, vitest_1.describe)("Yahoo Finance Scraper", () => {
|
|
23
|
+
(0, vitest_1.it)("should return formatted finance results", async () => {
|
|
24
|
+
const mockQuote = {
|
|
25
|
+
symbol: "AAPL",
|
|
26
|
+
shortName: "Apple Inc.",
|
|
27
|
+
longName: "Apple Inc.",
|
|
28
|
+
regularMarketPrice: 150.00,
|
|
29
|
+
regularMarketChange: 2.50,
|
|
30
|
+
regularMarketChangePercent: 1.69,
|
|
31
|
+
regularMarketTime: new Date("2023-01-01"),
|
|
32
|
+
currency: "USD",
|
|
33
|
+
exchange: "NMS",
|
|
34
|
+
marketState: "REGULAR",
|
|
35
|
+
};
|
|
36
|
+
yahoo_finance2_1.default.quote.mockResolvedValue(mockQuote);
|
|
37
|
+
const result = await (0, yahoo_finance_1.getStockQuote)("AAPL");
|
|
38
|
+
(0, vitest_1.expect)(result).toEqual({
|
|
39
|
+
symbol: "AAPL",
|
|
40
|
+
shortName: "Apple Inc.",
|
|
41
|
+
longName: "Apple Inc.",
|
|
42
|
+
regularMarketPrice: 150.00,
|
|
43
|
+
regularMarketChange: 2.50,
|
|
44
|
+
regularMarketChangePercent: 1.69,
|
|
45
|
+
regularMarketTime: mockQuote.regularMarketTime,
|
|
46
|
+
currency: "USD",
|
|
47
|
+
exchange: "NMS",
|
|
48
|
+
marketState: "REGULAR",
|
|
49
|
+
source: "yahoo-finance",
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.it)("should handle errors", async () => {
|
|
53
|
+
yahoo_finance2_1.default.quote.mockRejectedValue(new Error("API Error"));
|
|
54
|
+
await (0, vitest_1.expect)((0, yahoo_finance_1.getStockQuote)("INVALID")).rejects.toMatchObject({
|
|
55
|
+
code: "FINANCE_QUOTE_ERROR",
|
|
56
|
+
message: vitest_1.expect.stringContaining("Failed to fetch quote"),
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
(0, vitest_1.describe)("getQuote (Orchestrator)", () => {
|
|
61
|
+
(0, vitest_1.it)("should return data from Yahoo Finance", async () => {
|
|
62
|
+
const mockQuote = {
|
|
63
|
+
symbol: "GOOGL",
|
|
64
|
+
regularMarketPrice: 2800.00,
|
|
65
|
+
};
|
|
66
|
+
yahoo_finance2_1.default.quote.mockResolvedValue(mockQuote);
|
|
67
|
+
const result = await (0, finance_1.getQuote)("GOOGL");
|
|
68
|
+
(0, vitest_1.expect)(result.symbol).toBe("GOOGL");
|
|
69
|
+
(0, vitest_1.expect)(result.source).toBe("yahoo-finance");
|
|
70
|
+
(0, vitest_1.expect)(yahoo_finance2_1.default.quote).toHaveBeenCalledWith("GOOGL");
|
|
71
|
+
});
|
|
72
|
+
(0, vitest_1.it)("should propagate errors", async () => {
|
|
73
|
+
yahoo_finance2_1.default.quote.mockRejectedValue(new Error("Fail"));
|
|
74
|
+
await (0, vitest_1.expect)((0, finance_1.getQuote)("ERROR")).rejects.toThrow();
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { FlightResult, FlightSearchOptions } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* Search for flights on Google Flights
|
|
4
|
+
* @param query Search query (e.g. "flights from JFK to LHR") or options object
|
|
5
|
+
* @param options Search options
|
|
6
|
+
* @returns Promise<FlightResult>
|
|
7
|
+
*/
|
|
8
|
+
export declare function searchFlights(query: string | FlightSearchOptions, options?: FlightSearchOptions): Promise<FlightResult>;
|