llm-search-tools 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +244 -0
  3. package/dist/index.d.ts +18 -0
  4. package/dist/index.js +40 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/integration.test.d.ts +1 -0
  7. package/dist/integration.test.js +237 -0
  8. package/dist/modules/answerbox.test.d.ts +1 -0
  9. package/dist/modules/answerbox.test.js +105 -0
  10. package/dist/modules/autocomplete.d.ts +11 -0
  11. package/dist/modules/autocomplete.js +159 -0
  12. package/dist/modules/autocomplete.test.d.ts +1 -0
  13. package/dist/modules/autocomplete.test.js +188 -0
  14. package/dist/modules/common.d.ts +26 -0
  15. package/dist/modules/common.js +263 -0
  16. package/dist/modules/common.test.d.ts +1 -0
  17. package/dist/modules/common.test.js +87 -0
  18. package/dist/modules/crawl.d.ts +9 -0
  19. package/dist/modules/crawl.js +117 -0
  20. package/dist/modules/crawl.test.d.ts +1 -0
  21. package/dist/modules/crawl.test.js +48 -0
  22. package/dist/modules/events.d.ts +8 -0
  23. package/dist/modules/events.js +129 -0
  24. package/dist/modules/events.test.d.ts +1 -0
  25. package/dist/modules/events.test.js +104 -0
  26. package/dist/modules/finance.d.ts +10 -0
  27. package/dist/modules/finance.js +20 -0
  28. package/dist/modules/finance.test.d.ts +1 -0
  29. package/dist/modules/finance.test.js +77 -0
  30. package/dist/modules/flights.d.ts +8 -0
  31. package/dist/modules/flights.js +135 -0
  32. package/dist/modules/flights.test.d.ts +1 -0
  33. package/dist/modules/flights.test.js +128 -0
  34. package/dist/modules/hackernews.d.ts +8 -0
  35. package/dist/modules/hackernews.js +87 -0
  36. package/dist/modules/hackernews.js.map +1 -0
  37. package/dist/modules/images.test.d.ts +1 -0
  38. package/dist/modules/images.test.js +145 -0
  39. package/dist/modules/integrations.test.d.ts +1 -0
  40. package/dist/modules/integrations.test.js +93 -0
  41. package/dist/modules/media.d.ts +11 -0
  42. package/dist/modules/media.js +132 -0
  43. package/dist/modules/media.test.d.ts +1 -0
  44. package/dist/modules/media.test.js +186 -0
  45. package/dist/modules/news.d.ts +3 -0
  46. package/dist/modules/news.js +39 -0
  47. package/dist/modules/news.test.d.ts +1 -0
  48. package/dist/modules/news.test.js +88 -0
  49. package/dist/modules/parser.d.ts +19 -0
  50. package/dist/modules/parser.js +361 -0
  51. package/dist/modules/parser.test.d.ts +1 -0
  52. package/dist/modules/parser.test.js +151 -0
  53. package/dist/modules/reddit.d.ts +21 -0
  54. package/dist/modules/reddit.js +107 -0
  55. package/dist/modules/scrape.d.ts +16 -0
  56. package/dist/modules/scrape.js +272 -0
  57. package/dist/modules/scrape.test.d.ts +1 -0
  58. package/dist/modules/scrape.test.js +232 -0
  59. package/dist/modules/scraper.d.ts +12 -0
  60. package/dist/modules/scraper.js +640 -0
  61. package/dist/modules/scrapers/anidb.d.ts +8 -0
  62. package/dist/modules/scrapers/anidb.js +156 -0
  63. package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
  64. package/dist/modules/scrapers/duckduckgo.js +284 -0
  65. package/dist/modules/scrapers/google-news.d.ts +2 -0
  66. package/dist/modules/scrapers/google-news.js +60 -0
  67. package/dist/modules/scrapers/google.d.ts +6 -0
  68. package/dist/modules/scrapers/google.js +211 -0
  69. package/dist/modules/scrapers/searxng.d.ts +2 -0
  70. package/dist/modules/scrapers/searxng.js +93 -0
  71. package/dist/modules/scrapers/thetvdb.d.ts +3 -0
  72. package/dist/modules/scrapers/thetvdb.js +147 -0
  73. package/dist/modules/scrapers/tmdb.d.ts +3 -0
  74. package/dist/modules/scrapers/tmdb.js +172 -0
  75. package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
  76. package/dist/modules/scrapers/yahoo-finance.js +33 -0
  77. package/dist/modules/search.d.ts +5 -0
  78. package/dist/modules/search.js +45 -0
  79. package/dist/modules/search.js.map +1 -0
  80. package/dist/modules/search.test.d.ts +1 -0
  81. package/dist/modules/search.test.js +219 -0
  82. package/dist/modules/urbandictionary.d.ts +12 -0
  83. package/dist/modules/urbandictionary.js +26 -0
  84. package/dist/modules/webpage.d.ts +4 -0
  85. package/dist/modules/webpage.js +150 -0
  86. package/dist/modules/webpage.js.map +1 -0
  87. package/dist/modules/wikipedia.d.ts +5 -0
  88. package/dist/modules/wikipedia.js +85 -0
  89. package/dist/modules/wikipedia.js.map +1 -0
  90. package/dist/scripts/interactive-search.d.ts +1 -0
  91. package/dist/scripts/interactive-search.js +98 -0
  92. package/dist/test.d.ts +1 -0
  93. package/dist/test.js +179 -0
  94. package/dist/test.js.map +1 -0
  95. package/dist/testBraveSearch.d.ts +1 -0
  96. package/dist/testBraveSearch.js +34 -0
  97. package/dist/testDuckDuckGo.d.ts +1 -0
  98. package/dist/testDuckDuckGo.js +52 -0
  99. package/dist/testEcosia.d.ts +1 -0
  100. package/dist/testEcosia.js +57 -0
  101. package/dist/testSearchModule.d.ts +1 -0
  102. package/dist/testSearchModule.js +95 -0
  103. package/dist/testwebpage.d.ts +1 -0
  104. package/dist/testwebpage.js +81 -0
  105. package/dist/types.d.ts +174 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/createTestDocx.d.ts +1 -0
  109. package/dist/utils/createTestDocx.js +58 -0
  110. package/dist/utils/htmlcleaner.d.ts +20 -0
  111. package/dist/utils/htmlcleaner.js +172 -0
  112. package/docs/README.md +275 -0
  113. package/docs/autocomplete.md +73 -0
  114. package/docs/crawling.md +88 -0
  115. package/docs/events.md +58 -0
  116. package/docs/examples.md +158 -0
  117. package/docs/finance.md +60 -0
  118. package/docs/flights.md +71 -0
  119. package/docs/hackernews.md +121 -0
  120. package/docs/media.md +87 -0
  121. package/docs/news.md +75 -0
  122. package/docs/parser.md +197 -0
  123. package/docs/scraper.md +347 -0
  124. package/docs/search.md +106 -0
  125. package/docs/wikipedia.md +91 -0
  126. package/package.json +97 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 llm-search-tools
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,244 @@
1
+ # llm-search-tools šŸ”
2
+
3
+ [![npm version](https://badge.fury.io/js/llm-search-tools.svg)](https://badge.fury.io/js/llm-search-tools)
4
+
5
+ > A Node.js module for searching and scraping web content, designed for LLMs but useful for everyone!
6
+
7
+ ## Features
8
+
9
+ - Search multiple engines (Google, DuckDuckGo, SearxNG)
10
+ - **Autocomplete/Suggestions** (Google, DuckDuckGo, Brave, etc.)
11
+ - **Website Crawling** (Multi-page extraction with Cheerio/Puppeteer)
12
+ - **Flight Search** (Google Flights)
13
+ - **Event Search** (Google Events)
14
+ - News search (Google News, DuckDuckGo)
15
+ - Finance data (Yahoo Finance)
16
+ - Wikipedia search and content extraction
17
+ - Media search (Movies, TV, Anime) via TMDB, AniDB, TheTVDB
18
+ - HackerNews scraping
19
+ - Webpage content extraction
20
+ - Document parsing (PDF, DOCX, CSV)
21
+ - Image OCR/text extraction support
22
+ - **No API keys required at all**
23
+ - Automatic fallbacks
24
+ - TypeScript & Node support
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ npm install llm-search-tools
30
+
31
+ # Optional: Install OCR language data for non-English languages
32
+ npm install tesseract.js-data
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```typescript
38
+ import {
39
+ search,
40
+ parse,
41
+ searchNews,
42
+ searchMedia,
43
+ getQuote,
44
+ getSuggestions,
45
+ crawl,
46
+ } from "llm-search-tools";
47
+
48
+ // Web Search
49
+ const results = await search("typescript tutorial");
50
+ console.log(results);
51
+
52
+ // Autocomplete
53
+ const suggestions = await getSuggestions("react");
54
+ console.log(suggestions.suggestions);
55
+
56
+ // Crawl Website
57
+ const pages = await crawl("https://example.com", { maxDepth: 1 });
58
+ console.log(pages[0].textContent);
59
+
60
+ // News Search
61
+ const news = await searchNews("technology trends");
62
+ console.log(news);
63
+
64
+ // Media Search
65
+ const movies = await searchMedia("Inception", { type: "movie" });
66
+ console.log(movies);
67
+
68
+ // Finance Data
69
+ const quote = await getQuote("AAPL");
70
+ console.log(quote);
71
+
72
+ // Parse Documents
73
+ const pdfResult = await parse("document.pdf");
74
+ console.log(pdfResult.text);
75
+
76
+ const csvResult = await parse("data.csv", {
77
+ csv: { columns: true },
78
+ });
79
+ console.log(csvResult.data);
80
+
81
+ // OCR Images
82
+ const imageResult = await parse("image.png", {
83
+ language: "eng",
84
+ });
85
+ console.log(imageResult.text);
86
+ ```
87
+
88
+ ## Supported File Types
89
+
90
+ ### Documents
91
+
92
+ - PDF files (`.pdf`)
93
+ - Word documents (`.docx`)
94
+ - CSV files (`.csv`)
95
+ - XML files (`.xml`)
96
+ - JSON files (`.json`)
97
+ - Text files (`.txt`, `.md`, etc.)
98
+
99
+ ### Images (OCR)
100
+
101
+ - PNG (`.png`)
102
+ - JPEG (`.jpg`, `.jpeg`)
103
+ - BMP (`.bmp`)
104
+ - GIF (`.gif`)
105
+
106
+ ## Documentation
107
+
108
+ See the [docs](./docs) directory for detailed documentation:
109
+
110
+ - [Search](./docs/search.md) - Web search capabilities
111
+ - [Autocomplete](./docs/autocomplete.md) - Search suggestions
112
+ - [Crawling](./docs/crawling.md) - Website crawling
113
+ - [Flights](./docs/flights.md) - Flight search
114
+ - [Events](./docs/events.md) - Event search
115
+ - [Media](./docs/media.md) - Media search (Movies, TV, Anime)
116
+ - [News](./docs/news.md) - News search capabilities
117
+ - [Finance](./docs/finance.md) - Finance data capabilities
118
+ - [Wikipedia](./docs/wikipedia.md) - Wikipedia integration
119
+ - [HackerNews](./docs/hackernews.md) - HackerNews API
120
+ - [Webpage](./docs/scraper.md) - Web content extraction
121
+ - [Parser](./docs/parser.md) - Document and image parsing
122
+
123
+ ## Example Usage
124
+
125
+ ### Web Search
126
+
127
+ ```typescript
128
+ import { search } from "llm-search-tools";
129
+
130
+ const results = await search("typescript tutorial");
131
+ console.log(results);
132
+ ```
133
+
134
+ ### Media Search
135
+
136
+ ```typescript
137
+ import { searchMedia } from "llm-search-tools";
138
+
139
+ // Search for a movie
140
+ const movies = await searchMedia("The Matrix", { type: "movie" });
141
+ console.log(movies);
142
+
143
+ // Search for anime (uses AniDB)
144
+ const anime = await searchMedia("Cowboy Bebop", { type: "anime" });
145
+ console.log(anime);
146
+ ```
147
+
148
+ ### Autocomplete & Crawling
149
+
150
+ ```typescript
151
+ import { getSuggestions, crawl } from "llm-search-tools";
152
+
153
+ // Get search suggestions
154
+ const suggestions = await getSuggestions("best javascript framework");
155
+ console.log(suggestions.suggestions);
156
+
157
+ // Crawl a website
158
+ const pages = await crawl("https://example.com", {
159
+ maxDepth: 2,
160
+ maxPages: 10,
161
+ });
162
+ console.log(`Crawled ${pages.length} pages`);
163
+ ```
164
+
165
+ ### Specialized Search
166
+
167
+ ```typescript
168
+ import { searchFlights, searchEvents } from "llm-search-tools";
169
+
170
+ // Find flights
171
+ const flights = await searchFlights({
172
+ from: "JFK",
173
+ to: "LHR",
174
+ departureDate: "2025-06-01",
175
+ });
176
+
177
+ // Find events
178
+ const events = await searchEvents("tech conferences in San Francisco");
179
+ ```
180
+
181
+ ### Document Parsing
182
+
183
+ ```typescript
184
+ import { parse } from "llm-search-tools";
185
+
186
+ // Parse PDF
187
+ const pdfResult = await parse("document.pdf");
188
+ console.log(pdfResult.text);
189
+
190
+ // Parse CSV with options
191
+ const csvResult = await parse("data.csv", {
192
+ csv: {
193
+ delimiter: ";",
194
+ columns: true,
195
+ },
196
+ });
197
+ console.log(csvResult.data);
198
+
199
+ // OCR Image
200
+ const imageResult = await parse("image.png", {
201
+ language: "eng", // supports multiple languages
202
+ });
203
+ console.log(imageResult.text);
204
+ ```
205
+
206
+ ### Error Handling
207
+
208
+ ```typescript
209
+ try {
210
+ const result = await parse("document.pdf");
211
+ console.log(result.text);
212
+ } catch (error) {
213
+ if (error.code === "PDF_PARSE_ERROR") {
214
+ console.error("PDF parsing failed:", error.message);
215
+ }
216
+ // Handle other errors
217
+ }
218
+ ```
219
+
220
+ [![NPM Download Stats](https://nodei.co/npm/llm-search-tools.png?downloads=true)](https://www.npmjs.com/package/llm-search-tools)
221
+
222
+ ## Dependencies
223
+
224
+ This package uses these great libraries:
225
+
226
+ - [@mozilla/readability](https://www.npmjs.com/package/@mozilla/readability) - Web content extraction
227
+ - [csv-parse](https://www.npmjs.com/package/csv-parse) - CSV parsing
228
+ - [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser) - XML parsing
229
+ - [google-sr](https://www.npmjs.com/package/google-sr) - Google search
230
+ - [google-news-scraper](https://www.npmjs.com/package/google-news-scraper) - Google News search
231
+ - [jsdom](https://www.npmjs.com/package/jsdom) - DOM emulation for web scraping
232
+ - [mammoth](https://www.npmjs.com/package/mammoth) - DOCX parsing
233
+ - [pdf-parse](https://www.npmjs.com/package/pdf-parse) - PDF parsing
234
+ - [puppeteer](https://www.npmjs.com/package/puppeteer) - Headless browser automation
235
+ - [tesseract.js](https://www.npmjs.com/package/tesseract.js) - OCR
236
+ - [wikipedia](https://www.npmjs.com/package/wikipedia) - Wikipedia API
237
+
238
+ ## License
239
+
240
+ MIT
241
+
242
+ ## Contributing [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](todo.md)
243
+
244
+ Contributions VERY welcome!! Please read the [contributing guidelines](CONTRIBUTING.md) first.
@@ -0,0 +1,18 @@
1
+ export * from "./modules/search";
2
+ export * from "./modules/scrape";
3
+ export * from "./modules/parser";
4
+ export * from "./modules/wikipedia";
5
+ export * from "./modules/hackernews";
6
+ export * from "./modules/news";
7
+ export * from "./modules/finance";
8
+ export * from "./modules/flights";
9
+ export * from "./modules/events";
10
+ export * from "./modules/media";
11
+ export * from "./modules/crawl";
12
+ export * from "./modules/autocomplete";
13
+ export * from "./modules/common";
14
+ export * from "./types";
15
+ export declare const VERSION = "1.1.0";
16
+ export declare const AUTHOR = "Minoa";
17
+ export declare const DEFAULT_TIMEOUT = 10000;
18
+ export declare const DEFAULT_LIMIT = 10;
package/dist/index.js ADDED
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+ // index.ts - main entry point for the package
3
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
+ if (k2 === undefined) k2 = k;
5
+ var desc = Object.getOwnPropertyDescriptor(m, k);
6
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
+ desc = { enumerable: true, get: function() { return m[k]; } };
8
+ }
9
+ Object.defineProperty(o, k2, desc);
10
+ }) : (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ o[k2] = m[k];
13
+ }));
14
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
15
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
16
+ };
17
+ Object.defineProperty(exports, "__esModule", { value: true });
18
+ exports.DEFAULT_LIMIT = exports.DEFAULT_TIMEOUT = exports.AUTHOR = exports.VERSION = void 0;
19
+ // export all our cool modules
20
+ __exportStar(require("./modules/search"), exports);
21
+ __exportStar(require("./modules/scrape"), exports);
22
+ __exportStar(require("./modules/parser"), exports);
23
+ __exportStar(require("./modules/wikipedia"), exports);
24
+ __exportStar(require("./modules/hackernews"), exports);
25
+ __exportStar(require("./modules/news"), exports);
26
+ __exportStar(require("./modules/finance"), exports);
27
+ __exportStar(require("./modules/flights"), exports);
28
+ __exportStar(require("./modules/events"), exports);
29
+ __exportStar(require("./modules/media"), exports);
30
+ __exportStar(require("./modules/crawl"), exports);
31
+ __exportStar(require("./modules/autocomplete"), exports);
32
+ __exportStar(require("./modules/common"), exports); // exporting common utilities might be useful for consumers too
33
+ // export types
34
+ __exportStar(require("./types"), exports);
35
+ // version info
36
+ exports.VERSION = "1.1.0";
37
+ exports.AUTHOR = "Minoa";
38
+ // default config
39
+ exports.DEFAULT_TIMEOUT = 10000;
40
+ exports.DEFAULT_LIMIT = 10;
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA,8CAA8C;;;;;;;;;;;;;;;;;AAE9C,8BAA8B;AAC9B,mDAAiC;AACjC,sDAAoC;AACpC,uDAAqC;AACrC,oDAAkC;AAElC,eAAe;AACf,0CAAwB;AAExB,eAAe;AACF,QAAA,OAAO,GAAG,OAAO,CAAC;AAClB,QAAA,MAAM,GAAG,YAAY,CAAC;AAEnC,iBAAiB;AACJ,QAAA,eAAe,GAAG,KAAK,CAAC;AACxB,QAAA,aAAa,GAAG,EAAE,CAAC;AAEhC,sCAAsC;AACtC,OAAO,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAC;AACjE,OAAO,CAAC,KAAK,CAAC,YAAY,eAAO,EAAE,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,237 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const scrape_1 = require("./modules/scrape");
5
+ const wikipedia_1 = require("./modules/wikipedia");
6
+ const hackernews_1 = require("./modules/hackernews");
7
+ const parser_1 = require("./modules/parser");
8
+ const search_1 = require("./modules/search");
9
+ const media_1 = require("./modules/media");
10
+ const fs_1 = require("fs");
11
+ const path_1 = require("path");
12
+ /**
13
+ * Comprehensive Integration Tests
14
+ * These tests demonstrate actual output from each module
15
+ */
16
+ (0, vitest_1.describe)("šŸ“¦ LLM-Kit Integration Tests", () => {
17
+ (0, vitest_1.describe)("šŸ” Search Module", () => {
18
+ vitest_1.it.skip("should search DuckDuckGo and return structured results", async () => {
19
+ // Skipped: External API, hits bot protection during automated testing
20
+ const results = await (0, search_1.searchDuckDuckGo)("typescript tutorial", { limit: 3 });
21
+ console.log("\nšŸ“Š DuckDuckGo Search Results:");
22
+ results.slice(0, 3).forEach((result, i) => {
23
+ console.log(`\n${i + 1}. ${result.title}`);
24
+ console.log(` šŸ”— ${result.url}`);
25
+ console.log(` šŸ“ ${result.snippet?.slice(0, 100)}...`);
26
+ });
27
+ (0, vitest_1.expect)(results).toBeDefined();
28
+ (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
29
+ (0, vitest_1.expect)(results[0]).toHaveProperty("title");
30
+ (0, vitest_1.expect)(results[0]).toHaveProperty("url");
31
+ (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
32
+ (0, vitest_1.expect)(results[0].source).toBe("duckduckgo");
33
+ }, 60000);
34
+ vitest_1.it.skip("should search Google and return structured results", async () => {
35
+ // Skipped: External API, hits bot protection during automated testing
36
+ const results = await (0, search_1.searchGoogle)("typescript tutorial", { limit: 3 });
37
+ console.log("\nšŸ“Š Google Search Results:");
38
+ results.slice(0, 3).forEach((result, i) => {
39
+ console.log(`\n${i + 1}. ${result.title}`);
40
+ console.log(` šŸ”— ${result.url}`);
41
+ console.log(` šŸ“ ${result.snippet?.slice(0, 100)}...`);
42
+ });
43
+ (0, vitest_1.expect)(results).toBeDefined();
44
+ (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
45
+ (0, vitest_1.expect)(results[0]).toHaveProperty("title");
46
+ (0, vitest_1.expect)(results[0]).toHaveProperty("url");
47
+ (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
48
+ (0, vitest_1.expect)(results[0].source).toBe("google");
49
+ }, 60000);
50
+ vitest_1.it.skip("should use unified search with fallback (requires internet)", async () => {
51
+ // Skipped: External API, may hit rate limits during testing
52
+ const results = await (0, search_1.search)("typescript tutorial", { limit: 3 });
53
+ console.log("\nšŸ“Š Unified Search Results Sample:");
54
+ console.log(JSON.stringify(results[0], null, 2));
55
+ (0, vitest_1.expect)(results).toBeDefined();
56
+ (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
57
+ (0, vitest_1.expect)(results[0]).toHaveProperty("title");
58
+ (0, vitest_1.expect)(results[0]).toHaveProperty("url");
59
+ (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
60
+ (0, vitest_1.expect)(results[0]).toHaveProperty("source");
61
+ }, 60000);
62
+ });
63
+ (0, vitest_1.describe)("šŸ“° Wikipedia Module", () => {
64
+ vitest_1.it.skip("should search Wikipedia and return results", async () => {
65
+ const results = await (0, wikipedia_1.wikiSearch)("Node.js");
66
+ console.log("\nšŸ“š Wikipedia Search Results:");
67
+ console.log(`Found ${results.length} results`);
68
+ console.log(`First result: ${results[0].title}`);
69
+ console.log(`Extract: ${results[0].extract?.slice(0, 100)}...`);
70
+ (0, vitest_1.expect)(results).toBeDefined();
71
+ (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
72
+ (0, vitest_1.expect)(results[0]).toHaveProperty("title");
73
+ (0, vitest_1.expect)(results[0]).toHaveProperty("url");
74
+ (0, vitest_1.expect)(results[0]).toHaveProperty("extract");
75
+ }, 15000);
76
+ vitest_1.it.skip("should get Wikipedia page content", async () => {
77
+ const content = await (0, wikipedia_1.wikiGetContent)("Node.js");
78
+ console.log("\nšŸ“„ Wikipedia Content Sample:");
79
+ console.log(`Length: ${content.length} characters`);
80
+ console.log(`Preview: ${content.slice(0, 200)}...`);
81
+ (0, vitest_1.expect)(content).toBeDefined();
82
+ (0, vitest_1.expect)(content.length).toBeGreaterThan(100);
83
+ }, 15000);
84
+ });
85
+ (0, vitest_1.describe)("šŸ—žļø HackerNews Module", () => {
86
+ (0, vitest_1.it)("should fetch top stories", async () => {
87
+ const stories = await (0, hackernews_1.getTopStories)(3);
88
+ console.log("\nšŸ”„ HackerNews Top Stories:");
89
+ stories.forEach((story, i) => {
90
+ console.log(`${i + 1}. ${story.title}`);
91
+ console.log(` šŸ‘¤ by ${story.author} | ā¬†ļø ${story.points} points | šŸ’¬ ${story.comments} comments`);
92
+ });
93
+ (0, vitest_1.expect)(stories).toBeDefined();
94
+ (0, vitest_1.expect)(stories.length).toBe(3);
95
+ (0, vitest_1.expect)(stories[0]).toHaveProperty("title");
96
+ (0, vitest_1.expect)(stories[0]).toHaveProperty("url");
97
+ (0, vitest_1.expect)(stories[0]).toHaveProperty("points");
98
+ (0, vitest_1.expect)(stories[0]).toHaveProperty("author");
99
+ }, 15000);
100
+ (0, vitest_1.it)("should fetch story by ID", async () => {
101
+ const topStories = await (0, hackernews_1.getTopStories)(1);
102
+ (0, vitest_1.expect)(topStories[0]).toBeDefined();
103
+ (0, vitest_1.expect)(topStories[0].id).toBeDefined();
104
+ if (!topStories[0].id)
105
+ return;
106
+ const story = await (0, hackernews_1.getStoryById)(topStories[0].id);
107
+ console.log("\nšŸ“– HackerNews Story Details:");
108
+ console.log(JSON.stringify(story, null, 2));
109
+ (0, vitest_1.expect)(story).toHaveProperty("title");
110
+ (0, vitest_1.expect)(story).toHaveProperty("url");
111
+ }, 15000);
112
+ });
113
+ (0, vitest_1.describe)("🌐 Scraper Module - Enhanced Features", () => {
114
+ (0, vitest_1.it)("should extract comprehensive webpage content", async () => {
115
+ const content = await (0, scrape_1.getWebpageContent)("https://example.com");
116
+ console.log("\nšŸŽÆ Webpage Extraction Results:");
117
+ console.log(`Title: ${content.title}`);
118
+ console.log(`Site: ${content.siteName || "N/A"}`);
119
+ console.log(`Favicon: ${content.favicon || "N/A"}`);
120
+ console.log(`Images: ${content.imageUrls?.length || 0} found`);
121
+ console.log(`Text Length: ${content.textContent.length} chars`);
122
+ console.log(`Markdown Length: ${content.markdown?.length || 0} chars`);
123
+ console.log(`\nText Preview: ${content.textContent.slice(0, 150)}...`);
124
+ console.log(`\nMarkdown Preview:\n${content.markdown?.slice(0, 200)}...`);
125
+ (0, vitest_1.expect)(content).toHaveProperty("title");
126
+ (0, vitest_1.expect)(content).toHaveProperty("content");
127
+ (0, vitest_1.expect)(content).toHaveProperty("textContent");
128
+ (0, vitest_1.expect)(content).toHaveProperty("markdown");
129
+ (0, vitest_1.expect)(content).toHaveProperty("favicon");
130
+ (0, vitest_1.expect)(content).toHaveProperty("imageUrls");
131
+ (0, vitest_1.expect)(content).toHaveProperty("rawHtml");
132
+ }, 30000);
133
+ vitest_1.it.skip("should handle Wikipedia URLs with image extraction", async () => {
134
+ const content = await (0, scrape_1.getWebpageContent)("https://en.wikipedia.org/wiki/TypeScript");
135
+ console.log("\nšŸ“š Wikipedia Page Extraction:");
136
+ console.log(`Title: ${content.title}`);
137
+ console.log(`Images found: ${content.imageUrls?.length || 0}`);
138
+ if (content.imageUrls && content.imageUrls.length > 0) {
139
+ console.log(`Sample images:`);
140
+ content.imageUrls.slice(0, 3).forEach((img, i) => {
141
+ console.log(` ${i + 1}. ${img}`);
142
+ });
143
+ }
144
+ (0, vitest_1.expect)(content.siteName).toBe("Wikipedia");
145
+ (0, vitest_1.expect)(content.markdown).toBeDefined();
146
+ }, 30000);
147
+ });
148
+ (0, vitest_1.describe)("šŸ“„ Parser Module - File Support", () => {
149
+ (0, vitest_1.it)("should parse various file types", async () => {
150
+ const testData = "Sample,CSV,Data\n1,2,3\n4,5,6";
151
+ const csvPath = (0, path_1.join)(process.cwd(), "test-sample.csv");
152
+ // Create a temporary CSV for testing
153
+ (0, fs_1.writeFileSync)(csvPath, testData);
154
+ try {
155
+ const result = await (0, parser_1.parse)(csvPath);
156
+ console.log("\nšŸ“Š CSV Parser Output:");
157
+ console.log(`Type: ${result.type}`);
158
+ console.log(`Text: ${result.text}`);
159
+ console.log(`Rows: ${result.metadata?.rowCount || "N/A"}`);
160
+ (0, vitest_1.expect)(result.type).toBe("csv");
161
+ (0, vitest_1.expect)(result.text).toContain("Sample");
162
+ }
163
+ finally {
164
+ // Cleanup
165
+ (0, fs_1.unlinkSync)(csvPath);
166
+ }
167
+ });
168
+ (0, vitest_1.it)("should handle plain text files", async () => {
169
+ const testText = "This is a test text file.\nWith multiple lines.\n";
170
+ const txtPath = (0, path_1.join)(process.cwd(), "test-sample.txt");
171
+ (0, fs_1.writeFileSync)(txtPath, testText);
172
+ try {
173
+ const result = await (0, parser_1.parse)(txtPath);
174
+ console.log("\nšŸ“ Text Parser Output:");
175
+ console.log(`Type: ${result.type}`);
176
+ console.log(`Content: ${result.text}`);
177
+ (0, vitest_1.expect)(result.type).toBe("text");
178
+ (0, vitest_1.expect)(result.text).toBe(testText);
179
+ }
180
+ finally {
181
+ (0, fs_1.unlinkSync)(txtPath);
182
+ }
183
+ });
184
+ (0, vitest_1.it)("should parse JSON files", async () => {
185
+ const testJson = { name: "Test", value: 42, nested: { key: "value" } };
186
+ const jsonPath = (0, path_1.join)(process.cwd(), "test-sample.json");
187
+ (0, fs_1.writeFileSync)(jsonPath, JSON.stringify(testJson, null, 2));
188
+ try {
189
+ const result = await (0, parser_1.parse)(jsonPath);
190
+ console.log("\nšŸ”§ JSON Parser Output:");
191
+ console.log(`Type: ${result.type}`);
192
+ console.log(`Data:`, result.data);
193
+ (0, vitest_1.expect)(result.type).toBe("json");
194
+ (0, vitest_1.expect)(result.data).toEqual(testJson);
195
+ }
196
+ finally {
197
+ (0, fs_1.unlinkSync)(jsonPath);
198
+ }
199
+ });
200
+ });
201
+ (0, vitest_1.describe)("šŸŽ¬ Media Module", () => {
202
+ vitest_1.it.skip("should search for movies/tv/anime", async () => {
203
+ // Skipped: External scraping, avoids hitting rate limits/blocks in automated tests
204
+ const results = await (0, media_1.searchMedia)("Breaking Bad", { type: "tv", limit: 1 });
205
+ console.log("\nšŸŽ¬ Media Search Results:");
206
+ if (results.length > 0) {
207
+ const show = results[0];
208
+ console.log(`Title: ${show.title}`);
209
+ console.log(`URL: ${show.url}`);
210
+ console.log(`Rating: ${show.rating}`);
211
+ console.log(`Description: ${show.description?.slice(0, 100)}...`);
212
+ }
213
+ (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
214
+ (0, vitest_1.expect)(results[0].title).toContain("Breaking Bad");
215
+ (0, vitest_1.expect)(results[0].mediaType).toBe("tv");
216
+ }, 30000);
217
+ });
218
+ (0, vitest_1.describe)("šŸŽØ Output Format Examples", () => {
219
+ (0, vitest_1.it)("should demonstrate WebpageContent structure", async () => {
220
+ const content = await (0, scrape_1.getWebpageContent)("https://example.com");
221
+ console.log("\nšŸ“‹ Complete WebpageContent Structure:");
222
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
223
+ console.log(JSON.stringify({
224
+ title: content.title,
225
+ siteName: content.siteName,
226
+ favicon: content.favicon,
227
+ excerpt: content.excerpt?.slice(0, 100),
228
+ textContentLength: content.textContent.length,
229
+ markdownLength: content.markdown?.length,
230
+ imageCount: content.imageUrls?.length,
231
+ sampleImage: content.imageUrls?.[0],
232
+ }, null, 2));
233
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
234
+ (0, vitest_1.expect)(content).toBeDefined();
235
+ }, 30000);
236
+ });
237
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const jsdom_1 = require("jsdom");
5
+ const google_1 = require("./scrapers/google");
6
+ const duckduckgo_1 = require("./scrapers/duckduckgo");
7
+ (0, vitest_1.describe)("Answer Box Extraction", () => {
8
+ (0, vitest_1.describe)("Google Answer Box", () => {
9
+ (0, vitest_1.it)("should extract featured snippet text (.hgKElc)", () => {
10
+ const html = `
11
+ <div class="hgKElc">
12
+ This is a featured snippet text.
13
+ </div>
14
+ `;
15
+ const dom = new jsdom_1.JSDOM(html);
16
+ const result = (0, google_1.extractAnswerBox)(dom.window.document);
17
+ (0, vitest_1.expect)(result).toBe("This is a featured snippet text.");
18
+ });
19
+ (0, vitest_1.it)("should extract list snippet (.LGOjhe)", () => {
20
+ const html = `
21
+ <div class="LGOjhe">
22
+ List item 1
23
+ List item 2
24
+ </div>
25
+ `;
26
+ const dom = new jsdom_1.JSDOM(html);
27
+ const result = (0, google_1.extractAnswerBox)(dom.window.document);
28
+ (0, vitest_1.expect)(result).toBe("List item 1\n List item 2");
29
+ });
30
+ (0, vitest_1.it)("should extract direct answer (.Z0LcW)", () => {
31
+ const html = `
32
+ <div class="Z0LcW">
33
+ 42
34
+ </div>
35
+ `;
36
+ const dom = new jsdom_1.JSDOM(html);
37
+ const result = (0, google_1.extractAnswerBox)(dom.window.document);
38
+ (0, vitest_1.expect)(result).toBe("42");
39
+ });
40
+ (0, vitest_1.it)("should extract knowledge panel description (.kno-rdesc span)", () => {
41
+ const html = `
42
+ <div class="kno-rdesc">
43
+ <span>A description of an entity.</span>
44
+ </div>
45
+ `;
46
+ const dom = new jsdom_1.JSDOM(html);
47
+ const result = (0, google_1.extractAnswerBox)(dom.window.document);
48
+ (0, vitest_1.expect)(result).toBe("A description of an entity.");
49
+ });
50
+ (0, vitest_1.it)("should return undefined if no answer box found", () => {
51
+ const html = `<div>Just a regular search result page</div>`;
52
+ const dom = new jsdom_1.JSDOM(html);
53
+ const result = (0, google_1.extractAnswerBox)(dom.window.document);
54
+ (0, vitest_1.expect)(result).toBeUndefined();
55
+ });
56
+ });
57
+ (0, vitest_1.describe)("DuckDuckGo Answer Box", () => {
58
+ (0, vitest_1.it)("should extract abstract (.module__text)", () => {
59
+ const html = `
60
+ <div class="module__text">
61
+ Abstract content from Wikipedia usually.
62
+ </div>
63
+ `;
64
+ const dom = new jsdom_1.JSDOM(html);
65
+ const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
66
+ (0, vitest_1.expect)(result).toBe("Abstract content from Wikipedia usually.");
67
+ });
68
+ (0, vitest_1.it)("should extract definition (.zci__def__text)", () => {
69
+ const html = `
70
+ <div class="zci__def__text">
71
+ Definition of a word.
72
+ </div>
73
+ `;
74
+ const dom = new jsdom_1.JSDOM(html);
75
+ const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
76
+ (0, vitest_1.expect)(result).toBe("Definition of a word.");
77
+ });
78
+ (0, vitest_1.it)("should extract calculator (.c-base__title)", () => {
79
+ const html = `
80
+ <div class="c-base__title">
81
+ 1 + 1 = 2
82
+ </div>
83
+ `;
84
+ const dom = new jsdom_1.JSDOM(html);
85
+ const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
86
+ (0, vitest_1.expect)(result).toBe("1 + 1 = 2");
87
+ });
88
+ (0, vitest_1.it)("should extract generic fact (.zci__body)", () => {
89
+ const html = `
90
+ <div class="zci__body">
91
+ A generic fact about something.
92
+ </div>
93
+ `;
94
+ const dom = new jsdom_1.JSDOM(html);
95
+ const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
96
+ (0, vitest_1.expect)(result).toBe("A generic fact about something.");
97
+ });
98
+ (0, vitest_1.it)("should return undefined if no answer box found", () => {
99
+ const html = `<div>Just a regular search result page</div>`;
100
+ const dom = new jsdom_1.JSDOM(html);
101
+ const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
102
+ (0, vitest_1.expect)(result).toBeUndefined();
103
+ });
104
+ });
105
+ });