npm - llm-search-tools - Versions diffs - 1.1.0 - Mend

llm-search-tools 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/LICENSE +21 -0
package/README.md +244 -0
package/dist/index.d.ts +18 -0
package/dist/index.js +40 -0
package/dist/index.js.map +1 -0
package/dist/integration.test.d.ts +1 -0
package/dist/integration.test.js +237 -0
package/dist/modules/answerbox.test.d.ts +1 -0
package/dist/modules/answerbox.test.js +105 -0
package/dist/modules/autocomplete.d.ts +11 -0
package/dist/modules/autocomplete.js +159 -0
package/dist/modules/autocomplete.test.d.ts +1 -0
package/dist/modules/autocomplete.test.js +188 -0
package/dist/modules/common.d.ts +26 -0
package/dist/modules/common.js +263 -0
package/dist/modules/common.test.d.ts +1 -0
package/dist/modules/common.test.js +87 -0
package/dist/modules/crawl.d.ts +9 -0
package/dist/modules/crawl.js +117 -0
package/dist/modules/crawl.test.d.ts +1 -0
package/dist/modules/crawl.test.js +48 -0
package/dist/modules/events.d.ts +8 -0
package/dist/modules/events.js +129 -0
package/dist/modules/events.test.d.ts +1 -0
package/dist/modules/events.test.js +104 -0
package/dist/modules/finance.d.ts +10 -0
package/dist/modules/finance.js +20 -0
package/dist/modules/finance.test.d.ts +1 -0
package/dist/modules/finance.test.js +77 -0
package/dist/modules/flights.d.ts +8 -0
package/dist/modules/flights.js +135 -0
package/dist/modules/flights.test.d.ts +1 -0
package/dist/modules/flights.test.js +128 -0
package/dist/modules/hackernews.d.ts +8 -0
package/dist/modules/hackernews.js +87 -0
package/dist/modules/hackernews.js.map +1 -0
package/dist/modules/images.test.d.ts +1 -0
package/dist/modules/images.test.js +145 -0
package/dist/modules/integrations.test.d.ts +1 -0
package/dist/modules/integrations.test.js +93 -0
package/dist/modules/media.d.ts +11 -0
package/dist/modules/media.js +132 -0
package/dist/modules/media.test.d.ts +1 -0
package/dist/modules/media.test.js +186 -0
package/dist/modules/news.d.ts +3 -0
package/dist/modules/news.js +39 -0
package/dist/modules/news.test.d.ts +1 -0
package/dist/modules/news.test.js +88 -0
package/dist/modules/parser.d.ts +19 -0
package/dist/modules/parser.js +361 -0
package/dist/modules/parser.test.d.ts +1 -0
package/dist/modules/parser.test.js +151 -0
package/dist/modules/reddit.d.ts +21 -0
package/dist/modules/reddit.js +107 -0
package/dist/modules/scrape.d.ts +16 -0
package/dist/modules/scrape.js +272 -0
package/dist/modules/scrape.test.d.ts +1 -0
package/dist/modules/scrape.test.js +232 -0
package/dist/modules/scraper.d.ts +12 -0
package/dist/modules/scraper.js +640 -0
package/dist/modules/scrapers/anidb.d.ts +8 -0
package/dist/modules/scrapers/anidb.js +156 -0
package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
package/dist/modules/scrapers/duckduckgo.js +284 -0
package/dist/modules/scrapers/google-news.d.ts +2 -0
package/dist/modules/scrapers/google-news.js +60 -0
package/dist/modules/scrapers/google.d.ts +6 -0
package/dist/modules/scrapers/google.js +211 -0
package/dist/modules/scrapers/searxng.d.ts +2 -0
package/dist/modules/scrapers/searxng.js +93 -0
package/dist/modules/scrapers/thetvdb.d.ts +3 -0
package/dist/modules/scrapers/thetvdb.js +147 -0
package/dist/modules/scrapers/tmdb.d.ts +3 -0
package/dist/modules/scrapers/tmdb.js +172 -0
package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
package/dist/modules/scrapers/yahoo-finance.js +33 -0
package/dist/modules/search.d.ts +5 -0
package/dist/modules/search.js +45 -0
package/dist/modules/search.js.map +1 -0
package/dist/modules/search.test.d.ts +1 -0
package/dist/modules/search.test.js +219 -0
package/dist/modules/urbandictionary.d.ts +12 -0
package/dist/modules/urbandictionary.js +26 -0
package/dist/modules/webpage.d.ts +4 -0
package/dist/modules/webpage.js +150 -0
package/dist/modules/webpage.js.map +1 -0
package/dist/modules/wikipedia.d.ts +5 -0
package/dist/modules/wikipedia.js +85 -0
package/dist/modules/wikipedia.js.map +1 -0
package/dist/scripts/interactive-search.d.ts +1 -0
package/dist/scripts/interactive-search.js +98 -0
package/dist/test.d.ts +1 -0
package/dist/test.js +179 -0
package/dist/test.js.map +1 -0
package/dist/testBraveSearch.d.ts +1 -0
package/dist/testBraveSearch.js +34 -0
package/dist/testDuckDuckGo.d.ts +1 -0
package/dist/testDuckDuckGo.js +52 -0
package/dist/testEcosia.d.ts +1 -0
package/dist/testEcosia.js +57 -0
package/dist/testSearchModule.d.ts +1 -0
package/dist/testSearchModule.js +95 -0
package/dist/testwebpage.d.ts +1 -0
package/dist/testwebpage.js +81 -0
package/dist/types.d.ts +174 -0
package/dist/types.js +3 -0
package/dist/types.js.map +1 -0
package/dist/utils/createTestDocx.d.ts +1 -0
package/dist/utils/createTestDocx.js +58 -0
package/dist/utils/htmlcleaner.d.ts +20 -0
package/dist/utils/htmlcleaner.js +172 -0
package/docs/README.md +275 -0
package/docs/autocomplete.md +73 -0
package/docs/crawling.md +88 -0
package/docs/events.md +58 -0
package/docs/examples.md +158 -0
package/docs/finance.md +60 -0
package/docs/flights.md +71 -0
package/docs/hackernews.md +121 -0
package/docs/media.md +87 -0
package/docs/news.md +75 -0
package/docs/parser.md +197 -0
package/docs/scraper.md +347 -0
package/docs/search.md +106 -0
package/docs/wikipedia.md +91 -0
package/package.json +97 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 llm-search-tools
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,244 @@
+# llm-search-tools 🔍
+[![npm version](https://badge.fury.io/js/llm-search-tools.svg)](https://badge.fury.io/js/llm-search-tools)
+> A Node.js module for searching and scraping web content, designed for LLMs but useful for everyone!
+## Features
+- Search multiple engines (Google, DuckDuckGo, SearxNG)
+- **Autocomplete/Suggestions** (Google, DuckDuckGo, Brave, etc.)
+- **Website Crawling** (Multi-page extraction with Cheerio/Puppeteer)
+- **Flight Search** (Google Flights)
+- **Event Search** (Google Events)
+- News search (Google News, DuckDuckGo)
+- Finance data (Yahoo Finance)
+- Wikipedia search and content extraction
+- Media search (Movies, TV, Anime) via TMDB, AniDB, TheTVDB
+- HackerNews scraping
+- Webpage content extraction
+- Document parsing (PDF, DOCX, CSV)
+- Image OCR/text extraction support
+- **No API keys required at all**
+- Automatic fallbacks
+- TypeScript & Node support
+## Installation
+```bash
+npm install llm-search-tools
+# Optional: Install OCR language data for non-English languages
+npm install tesseract.js-data
+```
+## Quick Start
+```typescript
+import {
+  search,
+  parse,
+  searchNews,
+  searchMedia,
+  getQuote,
+  getSuggestions,
+  crawl,
+} from "llm-search-tools";
+// Web Search
+const results = await search("typescript tutorial");
+console.log(results);
+// Autocomplete
+const suggestions = await getSuggestions("react");
+console.log(suggestions.suggestions);
+// Crawl Website
+const pages = await crawl("https://example.com", { maxDepth: 1 });
+console.log(pages[0].textContent);
+// News Search
+const news = await searchNews("technology trends");
+console.log(news);
+// Media Search
+const movies = await searchMedia("Inception", { type: "movie" });
+console.log(movies);
+// Finance Data
+const quote = await getQuote("AAPL");
+console.log(quote);
+// Parse Documents
+const pdfResult = await parse("document.pdf");
+console.log(pdfResult.text);
+const csvResult = await parse("data.csv", {
+  csv: { columns: true },
+});
+console.log(csvResult.data);
+// OCR Images
+const imageResult = await parse("image.png", {
+  language: "eng",
+});
+console.log(imageResult.text);
+```
+## Supported File Types
+### Documents
+- PDF files (`.pdf`)
+- Word documents (`.docx`)
+- CSV files (`.csv`)
+- XML files (`.xml`)
+- JSON files (`.json`)
+- Text files (`.txt`, `.md`, etc.)
+### Images (OCR)
+- PNG (`.png`)
+- JPEG (`.jpg`, `.jpeg`)
+- BMP (`.bmp`)
+- GIF (`.gif`)
+## Documentation
+See the [docs](./docs) directory for detailed documentation:
+- [Search](./docs/search.md) - Web search capabilities
+- [Autocomplete](./docs/autocomplete.md) - Search suggestions
+- [Crawling](./docs/crawling.md) - Website crawling
+- [Flights](./docs/flights.md) - Flight search
+- [Events](./docs/events.md) - Event search
+- [Media](./docs/media.md) - Media search (Movies, TV, Anime)
+- [News](./docs/news.md) - News search capabilities
+- [Finance](./docs/finance.md) - Finance data capabilities
+- [Wikipedia](./docs/wikipedia.md) - Wikipedia integration
+- [HackerNews](./docs/hackernews.md) - HackerNews API
+- [Webpage](./docs/scraper.md) - Web content extraction
+- [Parser](./docs/parser.md) - Document and image parsing
+## Example Usage
+### Web Search
+```typescript
+import { search } from "llm-search-tools";
+const results = await search("typescript tutorial");
+console.log(results);
+```
+### Media Search
+```typescript
+import { searchMedia } from "llm-search-tools";
+// Search for a movie
+const movies = await searchMedia("The Matrix", { type: "movie" });
+console.log(movies);
+// Search for anime (uses AniDB)
+const anime = await searchMedia("Cowboy Bebop", { type: "anime" });
+console.log(anime);
+```
+### Autocomplete & Crawling
+```typescript
+import { getSuggestions, crawl } from "llm-search-tools";
+// Get search suggestions
+const suggestions = await getSuggestions("best javascript framework");
+console.log(suggestions.suggestions);
+// Crawl a website
+const pages = await crawl("https://example.com", {
+  maxDepth: 2,
+  maxPages: 10,
+});
+console.log(`Crawled ${pages.length} pages`);
+```
+### Specialized Search
+```typescript
+import { searchFlights, searchEvents } from "llm-search-tools";
+// Find flights
+const flights = await searchFlights({
+  from: "JFK",
+  to: "LHR",
+  departureDate: "2025-06-01",
+});
+// Find events
+const events = await searchEvents("tech conferences in San Francisco");
+```
+### Document Parsing
+```typescript
+import { parse } from "llm-search-tools";
+// Parse PDF
+const pdfResult = await parse("document.pdf");
+console.log(pdfResult.text);
+// Parse CSV with options
+const csvResult = await parse("data.csv", {
+  csv: {
+    delimiter: ";",
+    columns: true,
+  },
+});
+console.log(csvResult.data);
+// OCR Image
+const imageResult = await parse("image.png", {
+  language: "eng", // supports multiple languages
+});
+console.log(imageResult.text);
+```
+### Error Handling
+```typescript
+try {
+  const result = await parse("document.pdf");
+  console.log(result.text);
+} catch (error) {
+  if (error.code === "PDF_PARSE_ERROR") {
+    console.error("PDF parsing failed:", error.message);
+  }
+  // Handle other errors
+}
+```
+[![NPM Download Stats](https://nodei.co/npm/llm-search-tools.png?downloads=true)](https://www.npmjs.com/package/llm-search-tools)
+## Dependencies
+This package uses these great libraries:
+- [@mozilla/readability](https://www.npmjs.com/package/@mozilla/readability) - Web content extraction
+- [csv-parse](https://www.npmjs.com/package/csv-parse) - CSV parsing
+- [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser) - XML parsing
+- [google-sr](https://www.npmjs.com/package/google-sr) - Google search
+- [google-news-scraper](https://www.npmjs.com/package/google-news-scraper) - Google News search
+- [jsdom](https://www.npmjs.com/package/jsdom) - DOM emulation for web scraping
+- [mammoth](https://www.npmjs.com/package/mammoth) - DOCX parsing
+- [pdf-parse](https://www.npmjs.com/package/pdf-parse) - PDF parsing
+- [puppeteer](https://www.npmjs.com/package/puppeteer) - Headless browser automation
+- [tesseract.js](https://www.npmjs.com/package/tesseract.js) - OCR
+- [wikipedia](https://www.npmjs.com/package/wikipedia) - Wikipedia API
+## License
+MIT
+## Contributing [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](todo.md)
+Contributions VERY welcome!! Please read the [contributing guidelines](CONTRIBUTING.md) first.

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+export * from "./modules/search";
+export * from "./modules/scrape";
+export * from "./modules/parser";
+export * from "./modules/wikipedia";
+export * from "./modules/hackernews";
+export * from "./modules/news";
+export * from "./modules/finance";
+export * from "./modules/flights";
+export * from "./modules/events";
+export * from "./modules/media";
+export * from "./modules/crawl";
+export * from "./modules/autocomplete";
+export * from "./modules/common";
+export * from "./types";
+export declare const VERSION = "1.1.0";
+export declare const AUTHOR = "Minoa";
+export declare const DEFAULT_TIMEOUT = 10000;
+export declare const DEFAULT_LIMIT = 10;

package/dist/index.js ADDED Viewed

@@ -0,0 +1,40 @@
+"use strict";
+// index.ts - main entry point for the package
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __exportStar = (this && this.__exportStar) || function(m, exports) {
+    for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.DEFAULT_LIMIT = exports.DEFAULT_TIMEOUT = exports.AUTHOR = exports.VERSION = void 0;
+// export all our cool modules
+__exportStar(require("./modules/search"), exports);
+__exportStar(require("./modules/scrape"), exports);
+__exportStar(require("./modules/parser"), exports);
+__exportStar(require("./modules/wikipedia"), exports);
+__exportStar(require("./modules/hackernews"), exports);
+__exportStar(require("./modules/news"), exports);
+__exportStar(require("./modules/finance"), exports);
+__exportStar(require("./modules/flights"), exports);
+__exportStar(require("./modules/events"), exports);
+__exportStar(require("./modules/media"), exports);
+__exportStar(require("./modules/crawl"), exports);
+__exportStar(require("./modules/autocomplete"), exports);
+__exportStar(require("./modules/common"), exports); // exporting common utilities might be useful for consumers too
+// export types
+__exportStar(require("./types"), exports);
+// version info
+exports.VERSION = "1.1.0";
+exports.AUTHOR = "Minoa";
+// default config
+exports.DEFAULT_TIMEOUT = 10000;
+exports.DEFAULT_LIMIT = 10;

package/dist/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA,8CAA8C;;;;;;;;;;;;;;;;;AAE9C,8BAA8B;AAC9B,mDAAiC;AACjC,sDAAoC;AACpC,uDAAqC;AACrC,oDAAkC;AAElC,eAAe;AACf,0CAAwB;AAExB,eAAe;AACF,QAAA,OAAO,GAAG,OAAO,CAAC;AAClB,QAAA,MAAM,GAAG,YAAY,CAAC;AAEnC,iBAAiB;AACJ,QAAA,eAAe,GAAG,KAAK,CAAC;AACxB,QAAA,aAAa,GAAG,EAAE,CAAC;AAEhC,sCAAsC;AACtC,OAAO,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAC;AACjE,OAAO,CAAC,KAAK,CAAC,YAAY,eAAO,EAAE,CAAC,CAAC"}

package/dist/integration.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/integration.test.js ADDED Viewed

@@ -0,0 +1,237 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const vitest_1 = require("vitest");
+const scrape_1 = require("./modules/scrape");
+const wikipedia_1 = require("./modules/wikipedia");
+const hackernews_1 = require("./modules/hackernews");
+const parser_1 = require("./modules/parser");
+const search_1 = require("./modules/search");
+const media_1 = require("./modules/media");
+const fs_1 = require("fs");
+const path_1 = require("path");
+/**
+ * Comprehensive Integration Tests
+ * These tests demonstrate actual output from each module
+ */
+(0, vitest_1.describe)("📦 LLM-Kit Integration Tests", () => {
+    (0, vitest_1.describe)("🔍 Search Module", () => {
+        vitest_1.it.skip("should search DuckDuckGo and return structured results", async () => {
+            // Skipped: External API, hits bot protection during automated testing
+            const results = await (0, search_1.searchDuckDuckGo)("typescript tutorial", { limit: 3 });
+            console.log("\n📊 DuckDuckGo Search Results:");
+            results.slice(0, 3).forEach((result, i) => {
+                console.log(`\n${i + 1}. ${result.title}`);
+                console.log(`   🔗 ${result.url}`);
+                console.log(`   📝 ${result.snippet?.slice(0, 100)}...`);
+            });
+            (0, vitest_1.expect)(results).toBeDefined();
+            (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
+            (0, vitest_1.expect)(results[0]).toHaveProperty("title");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("url");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
+            (0, vitest_1.expect)(results[0].source).toBe("duckduckgo");
+        }, 60000);
+        vitest_1.it.skip("should search Google and return structured results", async () => {
+            // Skipped: External API, hits bot protection during automated testing
+            const results = await (0, search_1.searchGoogle)("typescript tutorial", { limit: 3 });
+            console.log("\n📊 Google Search Results:");
+            results.slice(0, 3).forEach((result, i) => {
+                console.log(`\n${i + 1}. ${result.title}`);
+                console.log(`   🔗 ${result.url}`);
+                console.log(`   📝 ${result.snippet?.slice(0, 100)}...`);
+            });
+            (0, vitest_1.expect)(results).toBeDefined();
+            (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
+            (0, vitest_1.expect)(results[0]).toHaveProperty("title");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("url");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
+            (0, vitest_1.expect)(results[0].source).toBe("google");
+        }, 60000);
+        vitest_1.it.skip("should use unified search with fallback (requires internet)", async () => {
+            // Skipped: External API, may hit rate limits during testing
+            const results = await (0, search_1.search)("typescript tutorial", { limit: 3 });
+            console.log("\n📊 Unified Search Results Sample:");
+            console.log(JSON.stringify(results[0], null, 2));
+            (0, vitest_1.expect)(results).toBeDefined();
+            (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
+            (0, vitest_1.expect)(results[0]).toHaveProperty("title");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("url");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("snippet");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("source");
+        }, 60000);
+    });
+    (0, vitest_1.describe)("📰 Wikipedia Module", () => {
+        vitest_1.it.skip("should search Wikipedia and return results", async () => {
+            const results = await (0, wikipedia_1.wikiSearch)("Node.js");
+            console.log("\n📚 Wikipedia Search Results:");
+            console.log(`Found ${results.length} results`);
+            console.log(`First result: ${results[0].title}`);
+            console.log(`Extract: ${results[0].extract?.slice(0, 100)}...`);
+            (0, vitest_1.expect)(results).toBeDefined();
+            (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
+            (0, vitest_1.expect)(results[0]).toHaveProperty("title");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("url");
+            (0, vitest_1.expect)(results[0]).toHaveProperty("extract");
+        }, 15000);
+        vitest_1.it.skip("should get Wikipedia page content", async () => {
+            const content = await (0, wikipedia_1.wikiGetContent)("Node.js");
+            console.log("\n📄 Wikipedia Content Sample:");
+            console.log(`Length: ${content.length} characters`);
+            console.log(`Preview: ${content.slice(0, 200)}...`);
+            (0, vitest_1.expect)(content).toBeDefined();
+            (0, vitest_1.expect)(content.length).toBeGreaterThan(100);
+        }, 15000);
+    });
+    (0, vitest_1.describe)("🗞️ HackerNews Module", () => {
+        (0, vitest_1.it)("should fetch top stories", async () => {
+            const stories = await (0, hackernews_1.getTopStories)(3);
+            console.log("\n🔥 HackerNews Top Stories:");
+            stories.forEach((story, i) => {
+                console.log(`${i + 1}. ${story.title}`);
+                console.log(`   👤 by ${story.author} | ⬆️ ${story.points} points | 💬 ${story.comments} comments`);
+            });
+            (0, vitest_1.expect)(stories).toBeDefined();
+            (0, vitest_1.expect)(stories.length).toBe(3);
+            (0, vitest_1.expect)(stories[0]).toHaveProperty("title");
+            (0, vitest_1.expect)(stories[0]).toHaveProperty("url");
+            (0, vitest_1.expect)(stories[0]).toHaveProperty("points");
+            (0, vitest_1.expect)(stories[0]).toHaveProperty("author");
+        }, 15000);
+        (0, vitest_1.it)("should fetch story by ID", async () => {
+            const topStories = await (0, hackernews_1.getTopStories)(1);
+            (0, vitest_1.expect)(topStories[0]).toBeDefined();
+            (0, vitest_1.expect)(topStories[0].id).toBeDefined();
+            if (!topStories[0].id)
+                return;
+            const story = await (0, hackernews_1.getStoryById)(topStories[0].id);
+            console.log("\n📖 HackerNews Story Details:");
+            console.log(JSON.stringify(story, null, 2));
+            (0, vitest_1.expect)(story).toHaveProperty("title");
+            (0, vitest_1.expect)(story).toHaveProperty("url");
+        }, 15000);
+    });
+    (0, vitest_1.describe)("🌐 Scraper Module - Enhanced Features", () => {
+        (0, vitest_1.it)("should extract comprehensive webpage content", async () => {
+            const content = await (0, scrape_1.getWebpageContent)("https://example.com");
+            console.log("\n🎯 Webpage Extraction Results:");
+            console.log(`Title: ${content.title}`);
+            console.log(`Site: ${content.siteName || "N/A"}`);
+            console.log(`Favicon: ${content.favicon || "N/A"}`);
+            console.log(`Images: ${content.imageUrls?.length || 0} found`);
+            console.log(`Text Length: ${content.textContent.length} chars`);
+            console.log(`Markdown Length: ${content.markdown?.length || 0} chars`);
+            console.log(`\nText Preview: ${content.textContent.slice(0, 150)}...`);
+            console.log(`\nMarkdown Preview:\n${content.markdown?.slice(0, 200)}...`);
+            (0, vitest_1.expect)(content).toHaveProperty("title");
+            (0, vitest_1.expect)(content).toHaveProperty("content");
+            (0, vitest_1.expect)(content).toHaveProperty("textContent");
+            (0, vitest_1.expect)(content).toHaveProperty("markdown");
+            (0, vitest_1.expect)(content).toHaveProperty("favicon");
+            (0, vitest_1.expect)(content).toHaveProperty("imageUrls");
+            (0, vitest_1.expect)(content).toHaveProperty("rawHtml");
+        }, 30000);
+        vitest_1.it.skip("should handle Wikipedia URLs with image extraction", async () => {
+            const content = await (0, scrape_1.getWebpageContent)("https://en.wikipedia.org/wiki/TypeScript");
+            console.log("\n📚 Wikipedia Page Extraction:");
+            console.log(`Title: ${content.title}`);
+            console.log(`Images found: ${content.imageUrls?.length || 0}`);
+            if (content.imageUrls && content.imageUrls.length > 0) {
+                console.log(`Sample images:`);
+                content.imageUrls.slice(0, 3).forEach((img, i) => {
+                    console.log(`  ${i + 1}. ${img}`);
+                });
+            }
+            (0, vitest_1.expect)(content.siteName).toBe("Wikipedia");
+            (0, vitest_1.expect)(content.markdown).toBeDefined();
+        }, 30000);
+    });
+    (0, vitest_1.describe)("📄 Parser Module - File Support", () => {
+        (0, vitest_1.it)("should parse various file types", async () => {
+            const testData = "Sample,CSV,Data\n1,2,3\n4,5,6";
+            const csvPath = (0, path_1.join)(process.cwd(), "test-sample.csv");
+            // Create a temporary CSV for testing
+            (0, fs_1.writeFileSync)(csvPath, testData);
+            try {
+                const result = await (0, parser_1.parse)(csvPath);
+                console.log("\n📊 CSV Parser Output:");
+                console.log(`Type: ${result.type}`);
+                console.log(`Text: ${result.text}`);
+                console.log(`Rows: ${result.metadata?.rowCount || "N/A"}`);
+                (0, vitest_1.expect)(result.type).toBe("csv");
+                (0, vitest_1.expect)(result.text).toContain("Sample");
+            }
+            finally {
+                // Cleanup
+                (0, fs_1.unlinkSync)(csvPath);
+            }
+        });
+        (0, vitest_1.it)("should handle plain text files", async () => {
+            const testText = "This is a test text file.\nWith multiple lines.\n";
+            const txtPath = (0, path_1.join)(process.cwd(), "test-sample.txt");
+            (0, fs_1.writeFileSync)(txtPath, testText);
+            try {
+                const result = await (0, parser_1.parse)(txtPath);
+                console.log("\n📝 Text Parser Output:");
+                console.log(`Type: ${result.type}`);
+                console.log(`Content: ${result.text}`);
+                (0, vitest_1.expect)(result.type).toBe("text");
+                (0, vitest_1.expect)(result.text).toBe(testText);
+            }
+            finally {
+                (0, fs_1.unlinkSync)(txtPath);
+            }
+        });
+        (0, vitest_1.it)("should parse JSON files", async () => {
+            const testJson = { name: "Test", value: 42, nested: { key: "value" } };
+            const jsonPath = (0, path_1.join)(process.cwd(), "test-sample.json");
+            (0, fs_1.writeFileSync)(jsonPath, JSON.stringify(testJson, null, 2));
+            try {
+                const result = await (0, parser_1.parse)(jsonPath);
+                console.log("\n🔧 JSON Parser Output:");
+                console.log(`Type: ${result.type}`);
+                console.log(`Data:`, result.data);
+                (0, vitest_1.expect)(result.type).toBe("json");
+                (0, vitest_1.expect)(result.data).toEqual(testJson);
+            }
+            finally {
+                (0, fs_1.unlinkSync)(jsonPath);
+            }
+        });
+    });
+    (0, vitest_1.describe)("🎬 Media Module", () => {
+        vitest_1.it.skip("should search for movies/tv/anime", async () => {
+            // Skipped: External scraping, avoids hitting rate limits/blocks in automated tests
+            const results = await (0, media_1.searchMedia)("Breaking Bad", { type: "tv", limit: 1 });
+            console.log("\n🎬 Media Search Results:");
+            if (results.length > 0) {
+                const show = results[0];
+                console.log(`Title: ${show.title}`);
+                console.log(`URL: ${show.url}`);
+                console.log(`Rating: ${show.rating}`);
+                console.log(`Description: ${show.description?.slice(0, 100)}...`);
+            }
+            (0, vitest_1.expect)(results.length).toBeGreaterThan(0);
+            (0, vitest_1.expect)(results[0].title).toContain("Breaking Bad");
+            (0, vitest_1.expect)(results[0].mediaType).toBe("tv");
+        }, 30000);
+    });
+    (0, vitest_1.describe)("🎨 Output Format Examples", () => {
+        (0, vitest_1.it)("should demonstrate WebpageContent structure", async () => {
+            const content = await (0, scrape_1.getWebpageContent)("https://example.com");
+            console.log("\n📋 Complete WebpageContent Structure:");
+            console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
+            console.log(JSON.stringify({
+                title: content.title,
+                siteName: content.siteName,
+                favicon: content.favicon,
+                excerpt: content.excerpt?.slice(0, 100),
+                textContentLength: content.textContent.length,
+                markdownLength: content.markdown?.length,
+                imageCount: content.imageUrls?.length,
+                sampleImage: content.imageUrls?.[0],
+            }, null, 2));
+            console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
+            (0, vitest_1.expect)(content).toBeDefined();
+        }, 30000);
+    });
+});

package/dist/modules/answerbox.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/modules/answerbox.test.js ADDED Viewed

@@ -0,0 +1,105 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const vitest_1 = require("vitest");
+const jsdom_1 = require("jsdom");
+const google_1 = require("./scrapers/google");
+const duckduckgo_1 = require("./scrapers/duckduckgo");
+(0, vitest_1.describe)("Answer Box Extraction", () => {
+    (0, vitest_1.describe)("Google Answer Box", () => {
+        (0, vitest_1.it)("should extract featured snippet text (.hgKElc)", () => {
+            const html = `
+        <div class="hgKElc">
+          This is a featured snippet text.
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, google_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("This is a featured snippet text.");
+        });
+        (0, vitest_1.it)("should extract list snippet (.LGOjhe)", () => {
+            const html = `
+        <div class="LGOjhe">
+          List item 1
+          List item 2
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, google_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("List item 1\n          List item 2");
+        });
+        (0, vitest_1.it)("should extract direct answer (.Z0LcW)", () => {
+            const html = `
+        <div class="Z0LcW">
+          42
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, google_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("42");
+        });
+        (0, vitest_1.it)("should extract knowledge panel description (.kno-rdesc span)", () => {
+            const html = `
+        <div class="kno-rdesc">
+          <span>A description of an entity.</span>
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, google_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("A description of an entity.");
+        });
+        (0, vitest_1.it)("should return undefined if no answer box found", () => {
+            const html = `<div>Just a regular search result page</div>`;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, google_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBeUndefined();
+        });
+    });
+    (0, vitest_1.describe)("DuckDuckGo Answer Box", () => {
+        (0, vitest_1.it)("should extract abstract (.module__text)", () => {
+            const html = `
+        <div class="module__text">
+          Abstract content from Wikipedia usually.
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("Abstract content from Wikipedia usually.");
+        });
+        (0, vitest_1.it)("should extract definition (.zci__def__text)", () => {
+            const html = `
+        <div class="zci__def__text">
+          Definition of a word.
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("Definition of a word.");
+        });
+        (0, vitest_1.it)("should extract calculator (.c-base__title)", () => {
+            const html = `
+        <div class="c-base__title">
+          1 + 1 = 2
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("1 + 1 = 2");
+        });
+        (0, vitest_1.it)("should extract generic fact (.zci__body)", () => {
+            const html = `
+        <div class="zci__body">
+          A generic fact about something.
+        </div>
+      `;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBe("A generic fact about something.");
+        });
+        (0, vitest_1.it)("should return undefined if no answer box found", () => {
+            const html = `<div>Just a regular search result page</div>`;
+            const dom = new jsdom_1.JSDOM(html);
+            const result = (0, duckduckgo_1.extractAnswerBox)(dom.window.document);
+            (0, vitest_1.expect)(result).toBeUndefined();
+        });
+    });
+});