llm-search-tools 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +244 -0
  3. package/dist/index.d.ts +18 -0
  4. package/dist/index.js +40 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/integration.test.d.ts +1 -0
  7. package/dist/integration.test.js +237 -0
  8. package/dist/modules/answerbox.test.d.ts +1 -0
  9. package/dist/modules/answerbox.test.js +105 -0
  10. package/dist/modules/autocomplete.d.ts +11 -0
  11. package/dist/modules/autocomplete.js +159 -0
  12. package/dist/modules/autocomplete.test.d.ts +1 -0
  13. package/dist/modules/autocomplete.test.js +188 -0
  14. package/dist/modules/common.d.ts +26 -0
  15. package/dist/modules/common.js +263 -0
  16. package/dist/modules/common.test.d.ts +1 -0
  17. package/dist/modules/common.test.js +87 -0
  18. package/dist/modules/crawl.d.ts +9 -0
  19. package/dist/modules/crawl.js +117 -0
  20. package/dist/modules/crawl.test.d.ts +1 -0
  21. package/dist/modules/crawl.test.js +48 -0
  22. package/dist/modules/events.d.ts +8 -0
  23. package/dist/modules/events.js +129 -0
  24. package/dist/modules/events.test.d.ts +1 -0
  25. package/dist/modules/events.test.js +104 -0
  26. package/dist/modules/finance.d.ts +10 -0
  27. package/dist/modules/finance.js +20 -0
  28. package/dist/modules/finance.test.d.ts +1 -0
  29. package/dist/modules/finance.test.js +77 -0
  30. package/dist/modules/flights.d.ts +8 -0
  31. package/dist/modules/flights.js +135 -0
  32. package/dist/modules/flights.test.d.ts +1 -0
  33. package/dist/modules/flights.test.js +128 -0
  34. package/dist/modules/hackernews.d.ts +8 -0
  35. package/dist/modules/hackernews.js +87 -0
  36. package/dist/modules/hackernews.js.map +1 -0
  37. package/dist/modules/images.test.d.ts +1 -0
  38. package/dist/modules/images.test.js +145 -0
  39. package/dist/modules/integrations.test.d.ts +1 -0
  40. package/dist/modules/integrations.test.js +93 -0
  41. package/dist/modules/media.d.ts +11 -0
  42. package/dist/modules/media.js +132 -0
  43. package/dist/modules/media.test.d.ts +1 -0
  44. package/dist/modules/media.test.js +186 -0
  45. package/dist/modules/news.d.ts +3 -0
  46. package/dist/modules/news.js +39 -0
  47. package/dist/modules/news.test.d.ts +1 -0
  48. package/dist/modules/news.test.js +88 -0
  49. package/dist/modules/parser.d.ts +19 -0
  50. package/dist/modules/parser.js +361 -0
  51. package/dist/modules/parser.test.d.ts +1 -0
  52. package/dist/modules/parser.test.js +151 -0
  53. package/dist/modules/reddit.d.ts +21 -0
  54. package/dist/modules/reddit.js +107 -0
  55. package/dist/modules/scrape.d.ts +16 -0
  56. package/dist/modules/scrape.js +272 -0
  57. package/dist/modules/scrape.test.d.ts +1 -0
  58. package/dist/modules/scrape.test.js +232 -0
  59. package/dist/modules/scraper.d.ts +12 -0
  60. package/dist/modules/scraper.js +640 -0
  61. package/dist/modules/scrapers/anidb.d.ts +8 -0
  62. package/dist/modules/scrapers/anidb.js +156 -0
  63. package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
  64. package/dist/modules/scrapers/duckduckgo.js +284 -0
  65. package/dist/modules/scrapers/google-news.d.ts +2 -0
  66. package/dist/modules/scrapers/google-news.js +60 -0
  67. package/dist/modules/scrapers/google.d.ts +6 -0
  68. package/dist/modules/scrapers/google.js +211 -0
  69. package/dist/modules/scrapers/searxng.d.ts +2 -0
  70. package/dist/modules/scrapers/searxng.js +93 -0
  71. package/dist/modules/scrapers/thetvdb.d.ts +3 -0
  72. package/dist/modules/scrapers/thetvdb.js +147 -0
  73. package/dist/modules/scrapers/tmdb.d.ts +3 -0
  74. package/dist/modules/scrapers/tmdb.js +172 -0
  75. package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
  76. package/dist/modules/scrapers/yahoo-finance.js +33 -0
  77. package/dist/modules/search.d.ts +5 -0
  78. package/dist/modules/search.js +45 -0
  79. package/dist/modules/search.js.map +1 -0
  80. package/dist/modules/search.test.d.ts +1 -0
  81. package/dist/modules/search.test.js +219 -0
  82. package/dist/modules/urbandictionary.d.ts +12 -0
  83. package/dist/modules/urbandictionary.js +26 -0
  84. package/dist/modules/webpage.d.ts +4 -0
  85. package/dist/modules/webpage.js +150 -0
  86. package/dist/modules/webpage.js.map +1 -0
  87. package/dist/modules/wikipedia.d.ts +5 -0
  88. package/dist/modules/wikipedia.js +85 -0
  89. package/dist/modules/wikipedia.js.map +1 -0
  90. package/dist/scripts/interactive-search.d.ts +1 -0
  91. package/dist/scripts/interactive-search.js +98 -0
  92. package/dist/test.d.ts +1 -0
  93. package/dist/test.js +179 -0
  94. package/dist/test.js.map +1 -0
  95. package/dist/testBraveSearch.d.ts +1 -0
  96. package/dist/testBraveSearch.js +34 -0
  97. package/dist/testDuckDuckGo.d.ts +1 -0
  98. package/dist/testDuckDuckGo.js +52 -0
  99. package/dist/testEcosia.d.ts +1 -0
  100. package/dist/testEcosia.js +57 -0
  101. package/dist/testSearchModule.d.ts +1 -0
  102. package/dist/testSearchModule.js +95 -0
  103. package/dist/testwebpage.d.ts +1 -0
  104. package/dist/testwebpage.js +81 -0
  105. package/dist/types.d.ts +174 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/createTestDocx.d.ts +1 -0
  109. package/dist/utils/createTestDocx.js +58 -0
  110. package/dist/utils/htmlcleaner.d.ts +20 -0
  111. package/dist/utils/htmlcleaner.js +172 -0
  112. package/docs/README.md +275 -0
  113. package/docs/autocomplete.md +73 -0
  114. package/docs/crawling.md +88 -0
  115. package/docs/events.md +58 -0
  116. package/docs/examples.md +158 -0
  117. package/docs/finance.md +60 -0
  118. package/docs/flights.md +71 -0
  119. package/docs/hackernews.md +121 -0
  120. package/docs/media.md +87 -0
  121. package/docs/news.md +75 -0
  122. package/docs/parser.md +197 -0
  123. package/docs/scraper.md +347 -0
  124. package/docs/search.md +106 -0
  125. package/docs/wikipedia.md +91 -0
  126. package/package.json +97 -0
@@ -0,0 +1,211 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractAnswerBox = extractAnswerBox;
4
+ exports.searchGoogle = searchGoogle;
5
+ const google_sr_1 = require("google-sr");
6
+ const common_1 = require("../common");
7
+ /**
8
+ * Extracts the "Answer Box" or "Featured Snippet" from the Google Search DOM
9
+ */
10
+ function extractAnswerBox(doc) {
11
+ // 1. Featured Snippet (Text) - .hgKElc
12
+ const featuredSnippet = doc.querySelector(".hgKElc");
13
+ if (featuredSnippet && featuredSnippet.textContent) {
14
+ return featuredSnippet.textContent.trim();
15
+ }
16
+ // 2. Featured Snippet (List) - .LGOjhe
17
+ const listSnippet = doc.querySelector(".LGOjhe");
18
+ if (listSnippet && listSnippet.textContent) {
19
+ return listSnippet.textContent.trim();
20
+ }
21
+ // 3. Direct Answer (e.g., calculations, dates) - .Z0LcW
22
+ const directAnswer = doc.querySelector(".Z0LcW");
23
+ if (directAnswer && directAnswer.textContent) {
24
+ return directAnswer.textContent.trim();
25
+ }
26
+ // 4. Knowledge Panel Description - .kno-rdesc span
27
+ const knowledgePanel = doc.querySelector(".kno-rdesc span");
28
+ if (knowledgePanel && knowledgePanel.textContent) {
29
+ return knowledgePanel.textContent.trim();
30
+ }
31
+ // 5. Dictionary Definition - div[data-attrid="description"]
32
+ const definition = doc.querySelector("div[data-attrid='description']");
33
+ if (definition && definition.textContent) {
34
+ return definition.textContent.trim();
35
+ }
36
+ return undefined;
37
+ }
38
+ // Rate limiting parameters
39
+ const GOOGLE_DELAY = 2000; // 2 seconds for Google
40
+ let lastGoogleSearchTime = 0;
41
+ // Cache for search results
42
+ const searchCache = new Map();
43
+ const CACHE_TTL = 60 * 60 * 1000; // 1 hour
44
+ // Helper function to enforce rate limiting
45
+ async function enforceRateLimit() {
46
+ const now = Date.now();
47
+ const timeSinceLastSearch = now - lastGoogleSearchTime;
48
+ if (timeSinceLastSearch < GOOGLE_DELAY) {
49
+ await new Promise((resolve) => setTimeout(resolve, GOOGLE_DELAY - timeSinceLastSearch));
50
+ }
51
+ lastGoogleSearchTime = Date.now();
52
+ }
53
+ // Search using Puppeteer
54
+ async function searchWithPuppeteer(query, options) {
55
+ const proxy = (0, common_1.parseProxyConfig)(options.proxy);
56
+ const browser = await (0, common_1.createStealthBrowser)(proxy || undefined);
57
+ const page = await browser.newPage();
58
+ try {
59
+ // Set realistic viewport
60
+ await page.setViewport({ width: 1920, height: 1080 });
61
+ // Set extra headers
62
+ await page.setExtraHTTPHeaders((0, common_1.createRealisticHeaders)());
63
+ if (options.category === "images") {
64
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&tbm=isch`;
65
+ await page.goto(searchUrl, { waitUntil: "networkidle2" });
66
+ // Wait for image results
67
+ // Google images usually have div.isv-r or specific containers
68
+ try {
69
+ await page.waitForSelector("div[data-id], div.isv-r", { timeout: 10000 });
70
+ }
71
+ catch (e) {
72
+ // continue, might be empty
73
+ }
74
+ const results = await page.evaluate((limit) => {
75
+ const items = [];
76
+ // Select image containers
77
+ const elements = document.querySelectorAll("div[data-id], div.isv-r");
78
+ for (let i = 0; i < Math.min(elements.length, limit || 20); i++) {
79
+ const el = elements[i];
80
+ // Title often in h3 or aria-label
81
+ const titleEl = el.querySelector("h3") || el.querySelector("[title]");
82
+ const title = titleEl?.textContent || titleEl?.getAttribute("title") || "Image";
83
+ // Source link (page containing image)
84
+ const linkEl = el.querySelector("a");
85
+ const url = linkEl?.href || "";
86
+ // Thumbnail
87
+ const imgEl = el.querySelector("img");
88
+ const thumbnailUrl = imgEl?.src || imgEl?.getAttribute("data-src") || "";
89
+ // Full image URL is hard to get without clicking.
90
+ // Sometimes it's in a JSON blob in the page, but that's brittle.
91
+ // We will use the thumbnail as a fallback for imageUrl if we can't find better.
92
+ // For now, let's just use the thumbnail.
93
+ if (url && thumbnailUrl) {
94
+ items.push({
95
+ title,
96
+ url, // This is the source page URL
97
+ snippet: title,
98
+ imageUrl: thumbnailUrl, // Using thumbnail as image URL for now due to complexity
99
+ thumbnailUrl,
100
+ source: "google-images",
101
+ });
102
+ }
103
+ }
104
+ return items;
105
+ }, options.limit);
106
+ return results;
107
+ }
108
+ // Default Web Search
109
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
110
+ await page.goto(searchUrl, { waitUntil: "networkidle2" });
111
+ // Wait for results
112
+ await page.waitForSelector("div.g", { timeout: 10000 });
113
+ // Extract results
114
+ const results = await page.evaluate((limit) => {
115
+ const items = [];
116
+ const elements = document.querySelectorAll("div.g");
117
+ for (let i = 0; i < Math.min(elements.length, limit || 10); i++) {
118
+ const el = elements[i];
119
+ const titleEl = el.querySelector("h3");
120
+ const linkEl = el.querySelector("a");
121
+ const snippetEl = el.querySelector(".VwiC3b");
122
+ if (titleEl && linkEl) {
123
+ items.push({
124
+ title: titleEl.textContent || "",
125
+ url: linkEl.href || "",
126
+ snippet: snippetEl?.textContent || "",
127
+ source: "google",
128
+ });
129
+ }
130
+ }
131
+ return items;
132
+ }, options.limit);
133
+ return results;
134
+ }
135
+ finally {
136
+ await browser.close();
137
+ }
138
+ }
139
+ async function searchGoogle(query, options = {}) {
140
+ try {
141
+ // Clone and merge options
142
+ const mergedOptions = {
143
+ limit: 10,
144
+ safeSearch: true,
145
+ timeout: 10000,
146
+ forcePuppeteer: false,
147
+ antiBot: {
148
+ enabled: true,
149
+ maxRetries: 3,
150
+ retryDelay: 2000,
151
+ },
152
+ ...options,
153
+ };
154
+ const cacheKey = (0, common_1.getCacheKey)(query, mergedOptions);
155
+ const cached = searchCache.get(cacheKey);
156
+ if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
157
+ return cached.results;
158
+ }
159
+ await enforceRateLimit();
160
+ // Try basic fetch first unless Puppeteer is forced or we are searching for images
161
+ if (!mergedOptions.forcePuppeteer && mergedOptions.category !== "images") {
162
+ try {
163
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
164
+ await (0, common_1.fetchWithDetection)(searchUrl, mergedOptions);
165
+ // If no bot detection, use library
166
+ const results = await (0, google_sr_1.search)({
167
+ query,
168
+ parsers: [google_sr_1.OrganicResult],
169
+ noPartialResults: true,
170
+ requestConfig: { queryParams: { safe: "active" } },
171
+ });
172
+ const formattedResults = results.map((r) => ({
173
+ title: r.title || "",
174
+ url: r.link || "",
175
+ snippet: r.description || "",
176
+ source: "google",
177
+ }));
178
+ searchCache.set(cacheKey, {
179
+ results: formattedResults,
180
+ timestamp: Date.now(),
181
+ source: "google",
182
+ });
183
+ return formattedResults;
184
+ }
185
+ catch (error) {
186
+ const errorMessage = error instanceof Error ? error.message : String(error);
187
+ if (errorMessage === "Bot protection detected" && mergedOptions.antiBot?.enabled) {
188
+ // Silent fallback
189
+ }
190
+ else {
191
+ throw error;
192
+ }
193
+ }
194
+ }
195
+ // Use Puppeteer as fallback
196
+ const results = await searchWithPuppeteer(query, mergedOptions);
197
+ searchCache.set(cacheKey, {
198
+ results,
199
+ timestamp: Date.now(),
200
+ source: "google",
201
+ });
202
+ return results;
203
+ }
204
+ catch (error) {
205
+ throw {
206
+ message: "google search failed :(",
207
+ code: "GOOGLE_SEARCH_ERROR",
208
+ originalError: error,
209
+ };
210
+ }
211
+ }
@@ -0,0 +1,2 @@
1
+ import { ScraperOptions, SearchResult } from "../../types";
2
+ export declare function searchSearxNG(query: string, options?: ScraperOptions): Promise<SearchResult[]>;
@@ -0,0 +1,93 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.searchSearxNG = searchSearxNG;
4
+ const common_1 = require("../common");
5
+ // Default public SearxNG instances
6
+ const DEFAULT_INSTANCES = [
7
+ "https://searx.be",
8
+ "https://searx.space",
9
+ "https://search.mdosch.de",
10
+ "https://searx.work",
11
+ "https://searx.fmac.xyz",
12
+ "https://northboot.xyz",
13
+ ];
14
+ // Cache for search results
15
+ const searchCache = new Map();
16
+ const CACHE_TTL = 60 * 60 * 1000; // 1 hour
17
+ async function searchSearxNG(query, options = {}) {
18
+ try {
19
+ const mergedOptions = {
20
+ limit: 10,
21
+ safeSearch: true,
22
+ timeout: 10000,
23
+ ...options,
24
+ };
25
+ const cacheKey = (0, common_1.getCacheKey)(query, mergedOptions);
26
+ const cached = searchCache.get(cacheKey);
27
+ if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
28
+ return cached.results;
29
+ }
30
+ // Use provided instance or pick a random default
31
+ const instance = mergedOptions.searxngInstance || DEFAULT_INSTANCES[Math.floor(Math.random() * DEFAULT_INSTANCES.length)];
32
+ // Construct URL with JSON format
33
+ const searchUrl = new URL(`${instance}/search`);
34
+ searchUrl.searchParams.append("q", query);
35
+ searchUrl.searchParams.append("format", "json");
36
+ searchUrl.searchParams.append("safesearch", mergedOptions.safeSearch ? "1" : "0");
37
+ if (mergedOptions.category === "images") {
38
+ searchUrl.searchParams.append("categories", "images");
39
+ }
40
+ try {
41
+ const { body } = await (0, common_1.fetchWithDetection)(searchUrl.toString(), mergedOptions);
42
+ const data = JSON.parse(body);
43
+ if (!data.results || !Array.isArray(data.results)) {
44
+ throw new Error("Invalid response format from SearxNG");
45
+ }
46
+ if (mergedOptions.category === "images") {
47
+ const results = data.results
48
+ .slice(0, mergedOptions.limit)
49
+ .filter((r) => r.img_src || r.thumbnail_src || r.url.match(/\.(jpeg|jpg|gif|png)$/i))
50
+ .map((r) => ({
51
+ title: r.title,
52
+ url: r.url,
53
+ snippet: r.content || r.title,
54
+ imageUrl: r.img_src || r.url,
55
+ thumbnailUrl: r.thumbnail_src || r.thumbnail || r.img_src || r.url,
56
+ source: "searxng-images",
57
+ }));
58
+ searchCache.set(cacheKey, {
59
+ results,
60
+ timestamp: Date.now(),
61
+ source: "searxng",
62
+ });
63
+ return results;
64
+ }
65
+ const results = data.results.slice(0, mergedOptions.limit).map((r) => ({
66
+ title: r.title,
67
+ url: r.url,
68
+ snippet: r.content || "",
69
+ source: "searxng",
70
+ }));
71
+ searchCache.set(cacheKey, {
72
+ results,
73
+ timestamp: Date.now(),
74
+ source: "searxng",
75
+ });
76
+ return results;
77
+ }
78
+ catch (error) {
79
+ // If specific error, rethrow
80
+ if (error instanceof Error && error.message.includes("Bot protection")) {
81
+ throw error;
82
+ }
83
+ throw new Error(`Failed to fetch from SearxNG instance ${instance}: ${error instanceof Error ? error.message : String(error)}`);
84
+ }
85
+ }
86
+ catch (error) {
87
+ throw {
88
+ message: "searxng search failed",
89
+ code: "SEARXNG_SEARCH_ERROR",
90
+ originalError: error,
91
+ };
92
+ }
93
+ }
@@ -0,0 +1,3 @@
1
+ import { MediaResult, MediaSearchOptions } from "../../types";
2
+ export declare function searchTheTVDB(query: string, options?: MediaSearchOptions): Promise<MediaResult[]>;
3
+ export declare function getTheTVDBDetails(url: string, options?: MediaSearchOptions): Promise<Partial<MediaResult>>;
@@ -0,0 +1,147 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.searchTheTVDB = searchTheTVDB;
4
+ exports.getTheTVDBDetails = getTheTVDBDetails;
5
+ const common_1 = require("../common");
6
+ const jsdom_1 = require("jsdom");
7
+ // Rate limiting
8
+ const MIN_DELAY = 1500;
9
+ let lastSearchTime = 0;
10
+ async function enforceRateLimit() {
11
+ const now = Date.now();
12
+ const timeSinceLast = now - lastSearchTime;
13
+ if (timeSinceLast < MIN_DELAY) {
14
+ await new Promise((resolve) => setTimeout(resolve, MIN_DELAY - timeSinceLast));
15
+ }
16
+ lastSearchTime = Date.now();
17
+ }
18
+ async function searchTheTVDB(query, options = {}) {
19
+ try {
20
+ await enforceRateLimit();
21
+ const mergedOptions = { ...options };
22
+ const searchUrl = `https://thetvdb.com/search?query=${encodeURIComponent(query)}`;
23
+ // Try basic fetch first
24
+ try {
25
+ if (!mergedOptions.forcePuppeteer) {
26
+ return await scrapeTheTVDBWithFetch(searchUrl, mergedOptions);
27
+ }
28
+ }
29
+ catch (e) {
30
+ if (mergedOptions.forcePuppeteer)
31
+ throw e;
32
+ }
33
+ // Fallback to Puppeteer
34
+ return await scrapeTheTVDBWithPuppeteer(searchUrl, mergedOptions);
35
+ }
36
+ catch (error) {
37
+ throw {
38
+ message: "TheTVDB search failed",
39
+ code: "THETVDB_SEARCH_ERROR",
40
+ originalError: error,
41
+ };
42
+ }
43
+ }
44
+ async function scrapeTheTVDBWithFetch(url, options) {
45
+ const { body } = await (0, common_1.fetchWithDetection)(url, options);
46
+ const dom = new jsdom_1.JSDOM(body);
47
+ return parseTheTVDBResults(dom.window.document);
48
+ }
49
+ async function scrapeTheTVDBWithPuppeteer(url, options) {
50
+ const proxy = (0, common_1.parseProxyConfig)(options.proxy);
51
+ const browser = await (0, common_1.createStealthBrowser)(proxy || undefined);
52
+ try {
53
+ const page = await browser.newPage();
54
+ await page.setViewport({ width: 1920, height: 1080 });
55
+ await page.setExtraHTTPHeaders((0, common_1.createRealisticHeaders)());
56
+ await page.goto(url, { waitUntil: "networkidle2" });
57
+ // Wait for results list
58
+ try {
59
+ await page.waitForSelector(".list-group, .media-list", { timeout: 5000 });
60
+ }
61
+ catch (e) {
62
+ return [];
63
+ }
64
+ const html = await page.content();
65
+ const dom = new jsdom_1.JSDOM(html);
66
+ return parseTheTVDBResults(dom.window.document);
67
+ }
68
+ finally {
69
+ await browser.close();
70
+ }
71
+ }
72
+ function parseTheTVDBResults(doc) {
73
+ const results = [];
74
+ // TheTVDB search results usually look like list items
75
+ const items = doc.querySelectorAll(".list-group-item, li.media");
76
+ items.forEach((item) => {
77
+ if (results.length >= 10)
78
+ return;
79
+ const titleEl = item.querySelector("h4, .media-heading");
80
+ const linkEl = item.querySelector("a");
81
+ const imgEl = item.querySelector("img");
82
+ const overviewEl = item.querySelector("p, .overview");
83
+ const smallText = item.querySelector("small"); // Often contains date or network
84
+ if (titleEl && linkEl) {
85
+ const title = titleEl.textContent?.trim() || "";
86
+ let href = linkEl.getAttribute("href") || "";
87
+ if (href && !href.startsWith("http")) {
88
+ href = `https://thetvdb.com${href}`;
89
+ }
90
+ const description = overviewEl?.textContent?.trim();
91
+ let posterUrl = imgEl?.getAttribute("src") || undefined;
92
+ // Filter out placeholder images if possible
93
+ if (posterUrl?.includes("missing"))
94
+ posterUrl = undefined;
95
+ // Detect type
96
+ let mediaType = "tv"; // Default for TheTVDB
97
+ if (href.includes("/movies/")) {
98
+ mediaType = "movie";
99
+ }
100
+ // Often TheTVDB lists translations for "Series" or "Movie" in badges
101
+ const badges = item.querySelectorAll(".badge");
102
+ badges.forEach((badge) => {
103
+ const text = badge.textContent?.toLowerCase();
104
+ if (text === "movie")
105
+ mediaType = "movie";
106
+ if (text === "series")
107
+ mediaType = "tv";
108
+ });
109
+ results.push({
110
+ title,
111
+ url: href,
112
+ description,
113
+ releaseDate: smallText?.textContent?.trim(),
114
+ posterUrl,
115
+ source: "thetvdb",
116
+ mediaType,
117
+ });
118
+ }
119
+ });
120
+ return results;
121
+ }
122
+ async function getTheTVDBDetails(url, options = {}) {
123
+ try {
124
+ const { body } = await (0, common_1.fetchWithDetection)(url, options);
125
+ const dom = new jsdom_1.JSDOM(body);
126
+ const doc = dom.window.document;
127
+ // Extract genres
128
+ const genres = [];
129
+ // Look for genre links or definition lists
130
+ // Typically in a sidebar or info block
131
+ const genreLinks = doc.querySelectorAll("a[href*='/genres/']");
132
+ genreLinks.forEach((link) => {
133
+ if (link.textContent)
134
+ genres.push(link.textContent.trim());
135
+ });
136
+ // Rating
137
+ // TheTVDB structure for rating might be in a header or info block
138
+ // Assuming a generic approach or looking for specific class if known
139
+ // (This is best effort without live DOM inspection)
140
+ return {
141
+ genres: genres.length > 0 ? genres : undefined,
142
+ };
143
+ }
144
+ catch (e) {
145
+ return {};
146
+ }
147
+ }
@@ -0,0 +1,3 @@
1
+ import { MediaResult, MediaSearchOptions } from "../../types";
2
+ export declare function searchTMDB(query: string, options?: MediaSearchOptions): Promise<MediaResult[]>;
3
+ export declare function getTMDBDetails(url: string, options?: MediaSearchOptions): Promise<Partial<MediaResult>>;
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.searchTMDB = searchTMDB;
4
+ exports.getTMDBDetails = getTMDBDetails;
5
+ const common_1 = require("../common");
6
+ const jsdom_1 = require("jsdom");
7
+ // Rate limiting
8
+ const MIN_DELAY = 1000;
9
+ let lastSearchTime = 0;
10
+ async function enforceRateLimit() {
11
+ const now = Date.now();
12
+ const timeSinceLast = now - lastSearchTime;
13
+ if (timeSinceLast < MIN_DELAY) {
14
+ await new Promise((resolve) => setTimeout(resolve, MIN_DELAY - timeSinceLast));
15
+ }
16
+ lastSearchTime = Date.now();
17
+ }
18
+ async function searchTMDB(query, options = {}) {
19
+ try {
20
+ await enforceRateLimit();
21
+ const mergedOptions = { ...options };
22
+ // Determine search type
23
+ let searchUrl = `https://www.themoviedb.org/search?query=${encodeURIComponent(query)}`;
24
+ if (mergedOptions.type === "movie") {
25
+ searchUrl = `https://www.themoviedb.org/search/movie?query=${encodeURIComponent(query)}`;
26
+ }
27
+ else if (mergedOptions.type === "tv") {
28
+ searchUrl = `https://www.themoviedb.org/search/tv?query=${encodeURIComponent(query)}`;
29
+ }
30
+ // First try basic fetch
31
+ try {
32
+ if (!mergedOptions.forcePuppeteer) {
33
+ return await scrapeTMDBWithFetch(searchUrl, mergedOptions);
34
+ }
35
+ }
36
+ catch (e) {
37
+ // Fallback to puppeteer below
38
+ if (mergedOptions.forcePuppeteer)
39
+ throw e;
40
+ }
41
+ // Fallback to Puppeteer
42
+ return await scrapeTMDBWithPuppeteer(searchUrl, mergedOptions);
43
+ }
44
+ catch (error) {
45
+ throw {
46
+ message: "TMDB search failed",
47
+ code: "TMDB_SEARCH_ERROR",
48
+ originalError: error,
49
+ };
50
+ }
51
+ }
52
+ async function scrapeTMDBWithFetch(url, options) {
53
+ const { body } = await (0, common_1.fetchWithDetection)(url, options);
54
+ const dom = new jsdom_1.JSDOM(body);
55
+ const doc = dom.window.document;
56
+ return parseTMDBResults(doc);
57
+ }
58
+ async function scrapeTMDBWithPuppeteer(url, options) {
59
+ const proxy = (0, common_1.parseProxyConfig)(options.proxy);
60
+ const browser = await (0, common_1.createStealthBrowser)(proxy || undefined);
61
+ try {
62
+ const page = await browser.newPage();
63
+ await page.setViewport({ width: 1920, height: 1080 });
64
+ await page.setExtraHTTPHeaders((0, common_1.createRealisticHeaders)());
65
+ await page.goto(url, { waitUntil: "networkidle2" });
66
+ // Wait for results
67
+ try {
68
+ await page.waitForSelector(".card", { timeout: 5000 });
69
+ }
70
+ catch (e) {
71
+ // No results found or timeout
72
+ return [];
73
+ }
74
+ const html = await page.content();
75
+ const dom = new jsdom_1.JSDOM(html);
76
+ return parseTMDBResults(dom.window.document);
77
+ }
78
+ finally {
79
+ await browser.close();
80
+ }
81
+ }
82
+ function parseTMDBResults(doc) {
83
+ const results = [];
84
+ const cards = doc.querySelectorAll(".card");
85
+ cards.forEach((card) => {
86
+ if (results.length >= 10)
87
+ return;
88
+ const titleEl = card.querySelector("h2");
89
+ const linkEl = card.querySelector("a.result"); // Usually the image link or title link
90
+ const dateEl = card.querySelector(".date");
91
+ const overviewEl = card.querySelector(".overview");
92
+ const imgEl = card.querySelector("img");
93
+ if (titleEl && linkEl) {
94
+ const title = titleEl.textContent?.trim() || "";
95
+ const href = linkEl.href;
96
+ const url = href.startsWith("http") ? href : `https://www.themoviedb.org${href}`;
97
+ const releaseDate = dateEl?.textContent?.trim();
98
+ const description = overviewEl?.textContent?.trim();
99
+ let posterUrl = imgEl?.src || imgEl?.getAttribute("data-src") || undefined;
100
+ if (posterUrl && !posterUrl.startsWith("http")) {
101
+ posterUrl = `https://www.themoviedb.org${posterUrl}`;
102
+ }
103
+ // Determine type from URL if possible
104
+ let mediaType = "movie"; // Default
105
+ if (url.includes("/tv/")) {
106
+ mediaType = "tv";
107
+ }
108
+ // Basic info first
109
+ results.push({
110
+ title,
111
+ url,
112
+ description,
113
+ releaseDate,
114
+ posterUrl,
115
+ source: "tmdb",
116
+ mediaType,
117
+ });
118
+ }
119
+ });
120
+ return results;
121
+ }
122
+ // Separate function to get details including cast and providers
123
+ // This would be called if the user asks for specific details on a result
124
+ async function getTMDBDetails(url, options = {}) {
125
+ // This function visits the detail page to get cast, genres, rating, providers
126
+ try {
127
+ const { body } = await (0, common_1.fetchWithDetection)(url, options);
128
+ const dom = new jsdom_1.JSDOM(body);
129
+ const doc = dom.window.document;
130
+ // Rating
131
+ const ratingEl = doc.querySelector(".user_score_chart");
132
+ const rating = ratingEl?.getAttribute("data-percent") ? `${ratingEl.getAttribute("data-percent")}%` : undefined;
133
+ // Genres
134
+ const genres = [];
135
+ doc.querySelectorAll(".genres a").forEach((el) => {
136
+ if (el.textContent)
137
+ genres.push(el.textContent.trim());
138
+ });
139
+ // Cast
140
+ const cast = [];
141
+ doc.querySelectorAll(".people.scroller li.card p a").forEach((el) => {
142
+ if (el.textContent)
143
+ cast.push(el.textContent.trim());
144
+ });
145
+ // Watch Providers (This is tricky as it's often loaded dynamically or in a separate section)
146
+ // TMDB often lists them in a section called "Where to Watch" or similar,
147
+ // but the actual data might be fetched via API or hidden.
148
+ // For basic scraping, we check for provider logos/links if visible.
149
+ const watchProviders = [];
150
+ // Check for provider list containers (provider structure varies)
151
+ const providerSections = doc.querySelectorAll(".provider");
152
+ providerSections.forEach((section) => {
153
+ const img = section.querySelector("img");
154
+ if (img) {
155
+ const name = img.getAttribute("alt") || "";
156
+ if (name) {
157
+ // Heuristic to guess type usually requires more context, defaulting to stream
158
+ watchProviders.push({ name, type: "stream" });
159
+ }
160
+ }
161
+ });
162
+ return {
163
+ rating,
164
+ genres,
165
+ cast,
166
+ watchProviders: watchProviders.length > 0 ? watchProviders : undefined,
167
+ };
168
+ }
169
+ catch (e) {
170
+ return {};
171
+ }
172
+ }
@@ -0,0 +1,2 @@
1
+ import { FinanceResult } from "../../types";
2
+ export declare function getStockQuote(symbol: string): Promise<FinanceResult>;
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getStockQuote = getStockQuote;
7
+ const yahoo_finance2_1 = __importDefault(require("yahoo-finance2"));
8
+ async function getStockQuote(symbol) {
9
+ try {
10
+ // Explicitly cast to Quote to avoid type inference issues
11
+ const quote = await yahoo_finance2_1.default.quote(symbol);
12
+ return {
13
+ symbol: quote.symbol,
14
+ shortName: quote.shortName,
15
+ longName: quote.longName,
16
+ regularMarketPrice: quote.regularMarketPrice,
17
+ regularMarketChange: quote.regularMarketChange,
18
+ regularMarketChangePercent: quote.regularMarketChangePercent,
19
+ regularMarketTime: quote.regularMarketTime,
20
+ currency: quote.currency,
21
+ exchange: quote.exchange,
22
+ marketState: quote.marketState,
23
+ source: "yahoo-finance",
24
+ };
25
+ }
26
+ catch (error) {
27
+ throw {
28
+ message: `Failed to fetch quote for symbol: ${symbol}`,
29
+ code: "FINANCE_QUOTE_ERROR",
30
+ originalError: error,
31
+ };
32
+ }
33
+ }
@@ -0,0 +1,5 @@
1
+ import { ScraperOptions, SearchResult } from "../types";
2
+ export { searchGoogle } from "./scrapers/google";
3
+ export { searchDuckDuckGo } from "./scrapers/duckduckgo";
4
+ export { searchSearxNG } from "./scrapers/searxng";
5
+ export declare function search(query: string, options?: ScraperOptions): Promise<SearchResult[]>;