llm-search-tools 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +244 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/integration.test.d.ts +1 -0
- package/dist/integration.test.js +237 -0
- package/dist/modules/answerbox.test.d.ts +1 -0
- package/dist/modules/answerbox.test.js +105 -0
- package/dist/modules/autocomplete.d.ts +11 -0
- package/dist/modules/autocomplete.js +159 -0
- package/dist/modules/autocomplete.test.d.ts +1 -0
- package/dist/modules/autocomplete.test.js +188 -0
- package/dist/modules/common.d.ts +26 -0
- package/dist/modules/common.js +263 -0
- package/dist/modules/common.test.d.ts +1 -0
- package/dist/modules/common.test.js +87 -0
- package/dist/modules/crawl.d.ts +9 -0
- package/dist/modules/crawl.js +117 -0
- package/dist/modules/crawl.test.d.ts +1 -0
- package/dist/modules/crawl.test.js +48 -0
- package/dist/modules/events.d.ts +8 -0
- package/dist/modules/events.js +129 -0
- package/dist/modules/events.test.d.ts +1 -0
- package/dist/modules/events.test.js +104 -0
- package/dist/modules/finance.d.ts +10 -0
- package/dist/modules/finance.js +20 -0
- package/dist/modules/finance.test.d.ts +1 -0
- package/dist/modules/finance.test.js +77 -0
- package/dist/modules/flights.d.ts +8 -0
- package/dist/modules/flights.js +135 -0
- package/dist/modules/flights.test.d.ts +1 -0
- package/dist/modules/flights.test.js +128 -0
- package/dist/modules/hackernews.d.ts +8 -0
- package/dist/modules/hackernews.js +87 -0
- package/dist/modules/hackernews.js.map +1 -0
- package/dist/modules/images.test.d.ts +1 -0
- package/dist/modules/images.test.js +145 -0
- package/dist/modules/integrations.test.d.ts +1 -0
- package/dist/modules/integrations.test.js +93 -0
- package/dist/modules/media.d.ts +11 -0
- package/dist/modules/media.js +132 -0
- package/dist/modules/media.test.d.ts +1 -0
- package/dist/modules/media.test.js +186 -0
- package/dist/modules/news.d.ts +3 -0
- package/dist/modules/news.js +39 -0
- package/dist/modules/news.test.d.ts +1 -0
- package/dist/modules/news.test.js +88 -0
- package/dist/modules/parser.d.ts +19 -0
- package/dist/modules/parser.js +361 -0
- package/dist/modules/parser.test.d.ts +1 -0
- package/dist/modules/parser.test.js +151 -0
- package/dist/modules/reddit.d.ts +21 -0
- package/dist/modules/reddit.js +107 -0
- package/dist/modules/scrape.d.ts +16 -0
- package/dist/modules/scrape.js +272 -0
- package/dist/modules/scrape.test.d.ts +1 -0
- package/dist/modules/scrape.test.js +232 -0
- package/dist/modules/scraper.d.ts +12 -0
- package/dist/modules/scraper.js +640 -0
- package/dist/modules/scrapers/anidb.d.ts +8 -0
- package/dist/modules/scrapers/anidb.js +156 -0
- package/dist/modules/scrapers/duckduckgo.d.ts +6 -0
- package/dist/modules/scrapers/duckduckgo.js +284 -0
- package/dist/modules/scrapers/google-news.d.ts +2 -0
- package/dist/modules/scrapers/google-news.js +60 -0
- package/dist/modules/scrapers/google.d.ts +6 -0
- package/dist/modules/scrapers/google.js +211 -0
- package/dist/modules/scrapers/searxng.d.ts +2 -0
- package/dist/modules/scrapers/searxng.js +93 -0
- package/dist/modules/scrapers/thetvdb.d.ts +3 -0
- package/dist/modules/scrapers/thetvdb.js +147 -0
- package/dist/modules/scrapers/tmdb.d.ts +3 -0
- package/dist/modules/scrapers/tmdb.js +172 -0
- package/dist/modules/scrapers/yahoo-finance.d.ts +2 -0
- package/dist/modules/scrapers/yahoo-finance.js +33 -0
- package/dist/modules/search.d.ts +5 -0
- package/dist/modules/search.js +45 -0
- package/dist/modules/search.js.map +1 -0
- package/dist/modules/search.test.d.ts +1 -0
- package/dist/modules/search.test.js +219 -0
- package/dist/modules/urbandictionary.d.ts +12 -0
- package/dist/modules/urbandictionary.js +26 -0
- package/dist/modules/webpage.d.ts +4 -0
- package/dist/modules/webpage.js +150 -0
- package/dist/modules/webpage.js.map +1 -0
- package/dist/modules/wikipedia.d.ts +5 -0
- package/dist/modules/wikipedia.js +85 -0
- package/dist/modules/wikipedia.js.map +1 -0
- package/dist/scripts/interactive-search.d.ts +1 -0
- package/dist/scripts/interactive-search.js +98 -0
- package/dist/test.d.ts +1 -0
- package/dist/test.js +179 -0
- package/dist/test.js.map +1 -0
- package/dist/testBraveSearch.d.ts +1 -0
- package/dist/testBraveSearch.js +34 -0
- package/dist/testDuckDuckGo.d.ts +1 -0
- package/dist/testDuckDuckGo.js +52 -0
- package/dist/testEcosia.d.ts +1 -0
- package/dist/testEcosia.js +57 -0
- package/dist/testSearchModule.d.ts +1 -0
- package/dist/testSearchModule.js +95 -0
- package/dist/testwebpage.d.ts +1 -0
- package/dist/testwebpage.js +81 -0
- package/dist/types.d.ts +174 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/createTestDocx.d.ts +1 -0
- package/dist/utils/createTestDocx.js +58 -0
- package/dist/utils/htmlcleaner.d.ts +20 -0
- package/dist/utils/htmlcleaner.js +172 -0
- package/docs/README.md +275 -0
- package/docs/autocomplete.md +73 -0
- package/docs/crawling.md +88 -0
- package/docs/events.md +58 -0
- package/docs/examples.md +158 -0
- package/docs/finance.md +60 -0
- package/docs/flights.md +71 -0
- package/docs/hackernews.md +121 -0
- package/docs/media.md +87 -0
- package/docs/news.md +75 -0
- package/docs/parser.md +197 -0
- package/docs/scraper.md +347 -0
- package/docs/search.md +106 -0
- package/docs/wikipedia.md +91 -0
- package/package.json +97 -0
package/docs/README.md
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# llm-search-tools Documentation
|
|
2
|
+
|
|
3
|
+
This documentation covers usage for both TypeScript and Node.js environments.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Installation](#installation)
|
|
8
|
+
- [TypeScript Usage](#typescript-usage)
|
|
9
|
+
- [Node.js Usage](#nodejs-usage)
|
|
10
|
+
- [Module Documentation](#module-documentation)
|
|
11
|
+
- [Scraper Module - Bot Detection & Proxies](#scraper-module---bot-detection--proxies)
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install llm-search-tools
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## TypeScript Usage
|
|
20
|
+
|
|
21
|
+
### Basic Search
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
import { search, SearchResult } from "llm-search-tools";
|
|
25
|
+
|
|
26
|
+
async function searchExample() {
|
|
27
|
+
try {
|
|
28
|
+
const results: SearchResult[] = await search("typescript tutorial");
|
|
29
|
+
console.log(results);
|
|
30
|
+
} catch (error) {
|
|
31
|
+
console.error("Search failed:", error);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Wikipedia Search and Content
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import { wikiSearch, wikiGetContent, WikipediaResult } from "llm-search-tools";
|
|
40
|
+
|
|
41
|
+
async function wikiExample() {
|
|
42
|
+
try {
|
|
43
|
+
const results: WikipediaResult[] = await wikiSearch("Node.js");
|
|
44
|
+
const content = await wikiGetContent(results[0].title);
|
|
45
|
+
console.log(content);
|
|
46
|
+
} catch (error) {
|
|
47
|
+
console.error("Wiki search failed:", error);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### HackerNews Integration
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
import {
|
|
56
|
+
getTopStories,
|
|
57
|
+
getNewStories,
|
|
58
|
+
getStoryById,
|
|
59
|
+
HackerNewsResult,
|
|
60
|
+
} from "llm-search-tools";
|
|
61
|
+
|
|
62
|
+
async function hnExample() {
|
|
63
|
+
try {
|
|
64
|
+
const topStories: HackerNewsResult[] = await getTopStories(5);
|
|
65
|
+
const newStories: HackerNewsResult[] = await getNewStories(5);
|
|
66
|
+
const story = await getStoryById(topStories[0].id);
|
|
67
|
+
console.log({ topStories, newStories, story });
|
|
68
|
+
} catch (error) {
|
|
69
|
+
console.error("HN fetch failed:", error);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Webpage Content Extraction
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import { getWebpageContent, WebpageContent } from "llm-search-tools";
|
|
78
|
+
|
|
79
|
+
async function webpageExample() {
|
|
80
|
+
try {
|
|
81
|
+
const content: WebpageContent = await getWebpageContent(
|
|
82
|
+
"https://example.com",
|
|
83
|
+
);
|
|
84
|
+
console.log({
|
|
85
|
+
title: content.title,
|
|
86
|
+
text: content.textContent,
|
|
87
|
+
excerpt: content.excerpt,
|
|
88
|
+
});
|
|
89
|
+
} catch (error) {
|
|
90
|
+
console.error("Content extraction failed:", error);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Node.js Usage
|
|
96
|
+
|
|
97
|
+
### Basic Search
|
|
98
|
+
|
|
99
|
+
```javascript
|
|
100
|
+
const { search } = require("llm-search-tools");
|
|
101
|
+
|
|
102
|
+
async function searchExample() {
|
|
103
|
+
try {
|
|
104
|
+
const results = await search("nodejs tutorial");
|
|
105
|
+
console.log(results);
|
|
106
|
+
} catch (error) {
|
|
107
|
+
console.error("Search failed:", error);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Wikipedia Search and Content
|
|
113
|
+
|
|
114
|
+
```javascript
|
|
115
|
+
const { wikiSearch, wikiGetContent } = require("llm-search-tools");
|
|
116
|
+
|
|
117
|
+
async function wikiExample() {
|
|
118
|
+
try {
|
|
119
|
+
const results = await wikiSearch("Node.js");
|
|
120
|
+
const content = await wikiGetContent(results[0].title);
|
|
121
|
+
console.log(content);
|
|
122
|
+
} catch (error) {
|
|
123
|
+
console.error("Wiki search failed:", error);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### HackerNews Integration
|
|
129
|
+
|
|
130
|
+
```javascript
|
|
131
|
+
const { getTopStories, getNewStories, getStoryById } = require("llm-search-tools");
|
|
132
|
+
|
|
133
|
+
async function hnExample() {
|
|
134
|
+
try {
|
|
135
|
+
const topStories = await getTopStories(5);
|
|
136
|
+
const newStories = await getNewStories(5);
|
|
137
|
+
const story = await getStoryById(topStories[0].id);
|
|
138
|
+
console.log({ topStories, newStories, story });
|
|
139
|
+
} catch (error) {
|
|
140
|
+
console.error("HN fetch failed:", error);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Webpage Content Extraction
|
|
146
|
+
|
|
147
|
+
```javascript
|
|
148
|
+
const { getWebpageContent } = require("llm-search-tools");
|
|
149
|
+
|
|
150
|
+
async function webpageExample() {
|
|
151
|
+
try {
|
|
152
|
+
const content = await getWebpageContent("https://example.com");
|
|
153
|
+
console.log({
|
|
154
|
+
title: content.title,
|
|
155
|
+
text: content.textContent,
|
|
156
|
+
excerpt: content.excerpt,
|
|
157
|
+
});
|
|
158
|
+
} catch (error) {
|
|
159
|
+
console.error("Content extraction failed:", error);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Module Documentation
|
|
165
|
+
|
|
166
|
+
For detailed documentation of each module, see:
|
|
167
|
+
|
|
168
|
+
- [Scraper Module](./scraper.md) - Bot detection, proxy support, and advanced scraping
|
|
169
|
+
- [Wikipedia Module](./wikipedia.md)
|
|
170
|
+
- [HackerNews Module](./hackernews.md)
|
|
171
|
+
- [Webpage Module](./webpage.md)
|
|
172
|
+
|
|
173
|
+
## Scraper Module - Bot Detection & Proxies
|
|
174
|
+
|
|
175
|
+
The scraper module provides intelligent web scraping with automatic bot detection and proxy support.
|
|
176
|
+
|
|
177
|
+
### Quick Examples
|
|
178
|
+
|
|
179
|
+
#### Basic Search with Bot Detection
|
|
180
|
+
|
|
181
|
+
```typescript
|
|
182
|
+
import { search } from "llm-search-tools";
|
|
183
|
+
|
|
184
|
+
// Automatically handles bot protection
|
|
185
|
+
const results = await search("typescript tutorial");
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
#### Using a Proxy
|
|
189
|
+
|
|
190
|
+
```typescript
|
|
191
|
+
// Proxy configuration
|
|
192
|
+
const results = await search("nodejs tutorial", {
|
|
193
|
+
proxy: "http://proxy.example.com:8080",
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Or with authentication
|
|
197
|
+
const results = await search("python tutorial", {
|
|
198
|
+
proxy: "http://user:pass@proxy.example.com:8080",
|
|
199
|
+
});
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
#### Force Puppeteer for JavaScript-heavy Sites
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
const results = await search("react tutorial", {
|
|
206
|
+
forcePuppeteer: true,
|
|
207
|
+
limit: 10,
|
|
208
|
+
});
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
#### Advanced Configuration
|
|
212
|
+
|
|
213
|
+
```typescript
|
|
214
|
+
import { ProxyConfig, ScraperOptions } from "llm-search-tools";
|
|
215
|
+
|
|
216
|
+
const proxyConfig: ProxyConfig = {
|
|
217
|
+
type: "socks5",
|
|
218
|
+
host: "proxy.example.com",
|
|
219
|
+
port: 1080,
|
|
220
|
+
auth: {
|
|
221
|
+
username: "user",
|
|
222
|
+
password: "pass",
|
|
223
|
+
},
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
const options: ScraperOptions = {
|
|
227
|
+
limit: 20,
|
|
228
|
+
proxy: proxyConfig,
|
|
229
|
+
antiBot: {
|
|
230
|
+
enabled: true,
|
|
231
|
+
maxRetries: 5,
|
|
232
|
+
retryDelay: 3000,
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
const results = await search("web scraping", options);
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Key Features
|
|
240
|
+
|
|
241
|
+
- **Automatic Bot Detection**: Detects Cloudflare, PerimeterX, Akamai, DataDome, and more
|
|
242
|
+
- **Puppeteer Fallback**: Seamlessly switches to browser automation when needed
|
|
243
|
+
- **Full Proxy Support**: HTTP, HTTPS, SOCKS4, SOCKS5 with authentication
|
|
244
|
+
- **Rate Limiting**: Built-in protection against IP bans
|
|
245
|
+
- **Backward Compatible**: Works with existing search API code
|
|
246
|
+
|
|
247
|
+
For complete documentation, see the [Scraper Module Documentation](./scraper.md).
|
|
248
|
+
|
|
249
|
+
## Error Handling
|
|
250
|
+
|
|
251
|
+
All functions throw a `SearchError` type with the following structure:
|
|
252
|
+
|
|
253
|
+
```typescript
|
|
254
|
+
interface SearchError {
|
|
255
|
+
message: string; // Human-readable error message
|
|
256
|
+
code: string; // Error code for programmatic handling
|
|
257
|
+
originalError?: unknown; // Original error object if available
|
|
258
|
+
}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
Example error handling:
|
|
262
|
+
|
|
263
|
+
```typescript
|
|
264
|
+
try {
|
|
265
|
+
const results = await search("typescript");
|
|
266
|
+
} catch (error) {
|
|
267
|
+
if (error.code === "GOOGLE_SEARCH_ERROR") {
|
|
268
|
+
// Handle Google search error
|
|
269
|
+
} else if (error.code === "DDG_SEARCH_ERROR") {
|
|
270
|
+
// Handle DuckDuckGo error
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
The same error handling works in JavaScript, just without the type annotations.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Autocomplete Documentation
|
|
2
|
+
|
|
3
|
+
The autocomplete module provides real-time search suggestions from multiple search engines. It's useful for building "search as you type" interfaces or generating keyword ideas.
|
|
4
|
+
|
|
5
|
+
## Supported Providers
|
|
6
|
+
|
|
7
|
+
- **Google**
|
|
8
|
+
- **DuckDuckGo** (default)
|
|
9
|
+
- **Yahoo**
|
|
10
|
+
- **Brave**
|
|
11
|
+
- **Yandex**
|
|
12
|
+
- **Ecosia**
|
|
13
|
+
- **Startpage**
|
|
14
|
+
- **Qwant**
|
|
15
|
+
- **Swisscows**
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import { getSuggestions } from "llm-search-tools";
|
|
21
|
+
|
|
22
|
+
// Default (DuckDuckGo)
|
|
23
|
+
const results = await getSuggestions("typescript");
|
|
24
|
+
console.log(results.suggestions);
|
|
25
|
+
// Output: ['typescript tutorial', 'typescript types', ...]
|
|
26
|
+
|
|
27
|
+
// Specify provider
|
|
28
|
+
const googleResults = await getSuggestions("react", "google");
|
|
29
|
+
|
|
30
|
+
// With options
|
|
31
|
+
const braveResults = await getSuggestions("linux", "brave", {
|
|
32
|
+
limit: 5,
|
|
33
|
+
proxy: "http://proxy.example.com:8080",
|
|
34
|
+
});
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## API Reference
|
|
38
|
+
|
|
39
|
+
### `getSuggestions(query, provider?, options?)`
|
|
40
|
+
|
|
41
|
+
- **query**: `string` - The partial search term
|
|
42
|
+
- **provider**: `string` - One of the supported providers (default: `'duckduckgo'`)
|
|
43
|
+
- **options**: `AutocompleteOptions`
|
|
44
|
+
|
|
45
|
+
### Options
|
|
46
|
+
|
|
47
|
+
| Option | Type | Description |
|
|
48
|
+
| --------- | ----------------------- | --------------------------------------------------------------------------------- |
|
|
49
|
+
| `limit` | `number` | Maximum number of suggestions to return (default varies by provider, usually ~10) |
|
|
50
|
+
| `proxy` | `string \| ProxyConfig` | Proxy configuration |
|
|
51
|
+
| `timeout` | `number` | Request timeout in milliseconds |
|
|
52
|
+
|
|
53
|
+
## Return Value
|
|
54
|
+
|
|
55
|
+
Returns a `Promise<AutocompleteResult>`:
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
interface AutocompleteResult {
|
|
59
|
+
query: string; // The original query
|
|
60
|
+
suggestions: string[]; // Array of suggestion strings
|
|
61
|
+
source: string; // The provider used (e.g., 'google')
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Error Handling
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
try {
|
|
69
|
+
const results = await getSuggestions("query", "google");
|
|
70
|
+
} catch (error) {
|
|
71
|
+
console.error("Autocomplete failed:", error.message);
|
|
72
|
+
}
|
|
73
|
+
```
|
package/docs/crawling.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Crawling Module Documentation
|
|
2
|
+
|
|
3
|
+
The crawling module allows you to crawl websites to extract content from multiple pages, following links up to a specified depth. It supports both lightweight HTML parsing (Cheerio) and full browser automation (Puppeteer).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multi-page Crawling**: Follows links to discover new pages
|
|
8
|
+
- **Configurable Depth**: Control how deep the crawler goes
|
|
9
|
+
- **Domain Confinement**: Option to stay within the original domain
|
|
10
|
+
- **Dual Modes**:
|
|
11
|
+
- `cheerio` (default): Fast, lightweight, static HTML only
|
|
12
|
+
- `puppeteer`: Full browser automation for dynamic JS-heavy sites
|
|
13
|
+
- **Content Normalization**: Automatically converts pages to clean text and Markdown
|
|
14
|
+
- **Proxy Support**: Route requests through proxies
|
|
15
|
+
- **Stealth Mode**: Built-in evasion for bot protection when using Puppeteer
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import { crawl } from "llm-search-tools";
|
|
21
|
+
|
|
22
|
+
// Basic crawl (depth 2, max 10 pages)
|
|
23
|
+
const results = await crawl("https://example.com");
|
|
24
|
+
|
|
25
|
+
results.forEach((page) => {
|
|
26
|
+
console.log(`URL: ${page.url}`);
|
|
27
|
+
console.log(`Title: ${page.title}`);
|
|
28
|
+
console.log(`Text: ${page.textContent.substring(0, 100)}...`);
|
|
29
|
+
});
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Advanced Usage
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
const results = await crawl("https://example.com", {
|
|
36
|
+
maxDepth: 3,
|
|
37
|
+
maxPages: 50,
|
|
38
|
+
stayOnDomain: true,
|
|
39
|
+
crawlType: "puppeteer", // Use for dynamic sites
|
|
40
|
+
proxy: "http://user:pass@proxy.com:8080",
|
|
41
|
+
});
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Options
|
|
45
|
+
|
|
46
|
+
The `crawl` function accepts a `CrawlOptions` object:
|
|
47
|
+
|
|
48
|
+
| Option | Type | Default | Description |
|
|
49
|
+
| hum | --- | --- | --- |
|
|
50
|
+
| `maxDepth` | `number` | `2` | How many links deep to follow (0 = only start URL) |
|
|
51
|
+
| `maxPages` | `number` | `10` | Maximum number of pages to crawl |
|
|
52
|
+
| `stayOnDomain` | `boolean` | `true` | If true, only follows links on the same domain |
|
|
53
|
+
| `crawlType` | `'cheerio' \| 'puppeteer'` | `'cheerio'` | Use `cheerio` for speed, `puppeteer` for dynamic content |
|
|
54
|
+
| `forcePuppeteer` | `boolean` | `false` | Alias for setting `crawlType: 'puppeteer'` |
|
|
55
|
+
| `proxy` | `string \| ProxyConfig` | `undefined` | Proxy configuration |
|
|
56
|
+
|
|
57
|
+
## Output Structure
|
|
58
|
+
|
|
59
|
+
The crawler returns a `Promise<CrawlResult>`, which is an array of `CrawledPage` objects:
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
interface CrawledPage {
|
|
63
|
+
url: string;
|
|
64
|
+
title: string;
|
|
65
|
+
content: string; // HTML content
|
|
66
|
+
textContent: string; // Cleaned text
|
|
67
|
+
markdown?: string; // Markdown version
|
|
68
|
+
excerpt?: string;
|
|
69
|
+
siteName?: string;
|
|
70
|
+
favicon?: string;
|
|
71
|
+
imageUrls?: string[];
|
|
72
|
+
depth: number; // Depth at which this page was found
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Error Handling
|
|
77
|
+
|
|
78
|
+
The crawler is robust and will log errors for individual pages while continuing to crawl others. If the entire crawl fails to start, it throws a `SearchError`.
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
try {
|
|
82
|
+
const results = await crawl("https://invalid-url.com");
|
|
83
|
+
} catch (error) {
|
|
84
|
+
if (error.code === "CRAWL_ERROR") {
|
|
85
|
+
console.error("Crawling failed:", error.message);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
```
|
package/docs/events.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Events Search Documentation
|
|
2
|
+
|
|
3
|
+
The events module enables searching for real-world events (concerts, festivals, meetups, etc.) using Google Events data. It uses Puppeteer with stealth techniques to extract structured event information.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { searchEvents } from "llm-search-tools";
|
|
9
|
+
|
|
10
|
+
const results = await searchEvents("concerts in Tokyo");
|
|
11
|
+
|
|
12
|
+
results.events.forEach((event) => {
|
|
13
|
+
console.log(`${event.date}: ${event.title} @ ${event.location}`);
|
|
14
|
+
});
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## API Reference
|
|
18
|
+
|
|
19
|
+
### `searchEvents(query, options?)`
|
|
20
|
+
|
|
21
|
+
- **query**: `string` - Search query (e.g., "tech conferences London", "music festivals 2024")
|
|
22
|
+
- **options**: `EventSearchOptions`
|
|
23
|
+
|
|
24
|
+
### Options
|
|
25
|
+
|
|
26
|
+
| Option | Type | Description |
|
|
27
|
+
| --------- | ----------------------- | ------------------------------------------------ |
|
|
28
|
+
| `limit` | `number` | Maximum number of events to return (default: 10) |
|
|
29
|
+
| `date` | `string` | _Experimental_ date filter |
|
|
30
|
+
| `proxy` | `string \| ProxyConfig` | Proxy configuration |
|
|
31
|
+
| `timeout` | `number` | Navigation timeout in ms (default: 30000) |
|
|
32
|
+
|
|
33
|
+
## Output Structure
|
|
34
|
+
|
|
35
|
+
Returns a `Promise<EventResult>`:
|
|
36
|
+
|
|
37
|
+
```typescript
|
|
38
|
+
interface EventResult {
|
|
39
|
+
events: Event[];
|
|
40
|
+
url: string; // The Google Events URL scraped
|
|
41
|
+
source: string; // 'google-events'
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface Event {
|
|
45
|
+
title: string;
|
|
46
|
+
date: string;
|
|
47
|
+
location: string;
|
|
48
|
+
link?: string;
|
|
49
|
+
description?: string;
|
|
50
|
+
image?: string;
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Notes
|
|
55
|
+
|
|
56
|
+
- This module relies on DOM scraping of Google's "Events" UI (`ibp=htl;events`).
|
|
57
|
+
- The structure of the Google Events page may change, which can affect extraction reliability.
|
|
58
|
+
- Proxy usage is recommended for frequent scraping to avoid CAPTCHAs.
|
package/docs/examples.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Examples 📚
|
|
2
|
+
|
|
3
|
+
Here are some examples of how to use llm-search-tools in different scenarios.
|
|
4
|
+
|
|
5
|
+
## Basic Search Example
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { search, wikiSearch, getWebpageContent } from 'llm-search-tools';
|
|
9
|
+
|
|
10
|
+
async function searchAndGetContent() {
|
|
11
|
+
try {
|
|
12
|
+
// Search across engines
|
|
13
|
+
const results = await search('typescript tutorial');
|
|
14
|
+
console.log('Search results:', results);
|
|
15
|
+
|
|
16
|
+
// Get webpage content from first result
|
|
17
|
+
if (results.length > 0) {
|
|
18
|
+
const content = await getWebpageContent(results[0].url);
|
|
19
|
+
console.log('First result content:', content.textContent);
|
|
20
|
+
}
|
|
21
|
+
} catch (err) {
|
|
22
|
+
console.error('Search failed:', err);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Wikipedia Research Example
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
import { wikiSearch, wikiGetContent, wikiGetSummary } from 'llm-search-tools';
|
|
31
|
+
|
|
32
|
+
async function researchTopic() {
|
|
33
|
+
try {
|
|
34
|
+
// Search Wikipedia
|
|
35
|
+
const results = await wikiSearch('artificial intelligence');
|
|
36
|
+
|
|
37
|
+
// Get full article for first result
|
|
38
|
+
if (results.length > 0) {
|
|
39
|
+
const summary = await wikiGetSummary(results[0].title);
|
|
40
|
+
console.log('Summary:', summary.extract);
|
|
41
|
+
|
|
42
|
+
const content = await wikiGetContent(results[0].title);
|
|
43
|
+
console.log('Full content:', content);
|
|
44
|
+
}
|
|
45
|
+
} catch (err) {
|
|
46
|
+
console.error('Wiki search failed:', err);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## HackerNews Feed Example
|
|
52
|
+
|
|
53
|
+
```typescript
|
|
54
|
+
import { getTopStories, getBestStories, getStoryById } from 'llm-search-tools';
|
|
55
|
+
|
|
56
|
+
async function getHNFeed() {
|
|
57
|
+
try {
|
|
58
|
+
// Get mix of top and best stories
|
|
59
|
+
const topStories = await getTopStories(5);
|
|
60
|
+
const bestStories = await getBestStories(5);
|
|
61
|
+
|
|
62
|
+
console.log('Top stories:', topStories);
|
|
63
|
+
console.log('Best stories:', bestStories);
|
|
64
|
+
|
|
65
|
+
// Get full details of first story
|
|
66
|
+
if (topStories.length > 0) {
|
|
67
|
+
const firstStory = await getStoryById(topStories[0].id);
|
|
68
|
+
console.log('Full story details:', firstStory);
|
|
69
|
+
}
|
|
70
|
+
} catch (err) {
|
|
71
|
+
console.error('HN fetch failed:', err);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Webpage Content Extraction Example
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
import { getWebpageContent, getWebpageText, isUrlAccessible } from 'llm-search-tools';
|
|
80
|
+
|
|
81
|
+
async function extractContent(url: string) {
|
|
82
|
+
try {
|
|
83
|
+
// Check if URL is accessible
|
|
84
|
+
if (await isUrlAccessible(url)) {
|
|
85
|
+
// Get full content with HTML
|
|
86
|
+
const content = await getWebpageContent(url);
|
|
87
|
+
console.log('Title:', content.title);
|
|
88
|
+
console.log('Excerpt:', content.excerpt);
|
|
89
|
+
console.log('Content:', content.content);
|
|
90
|
+
|
|
91
|
+
// Get just the text
|
|
92
|
+
const text = await getWebpageText(url);
|
|
93
|
+
console.log('Plain text:', text);
|
|
94
|
+
}
|
|
95
|
+
} catch (err) {
|
|
96
|
+
console.error('Content extraction failed:', err);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Multi-Source Research Example
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
import { search, wikiSearch, getWebpageContent, getTopStories } from 'llm-search-tools';
|
|
105
|
+
|
|
106
|
+
async function researchTopic(query: string) {
|
|
107
|
+
try {
|
|
108
|
+
// Search multiple sources in parallel
|
|
109
|
+
const [
|
|
110
|
+
searchResults,
|
|
111
|
+
wikiResults,
|
|
112
|
+
hnStories
|
|
113
|
+
] = await Promise.all([
|
|
114
|
+
search(query),
|
|
115
|
+
wikiSearch(query),
|
|
116
|
+
getTopStories(5)
|
|
117
|
+
]);
|
|
118
|
+
|
|
119
|
+
console.log('Web search results:', searchResults);
|
|
120
|
+
console.log('Wikipedia results:', wikiResults);
|
|
121
|
+
console.log('Related HN stories:', hnStories);
|
|
122
|
+
|
|
123
|
+
// Get content from first search result
|
|
124
|
+
if (searchResults.length > 0) {
|
|
125
|
+
const content = await getWebpageContent(searchResults[0].url);
|
|
126
|
+
console.log('Main article content:', content.textContent);
|
|
127
|
+
}
|
|
128
|
+
} catch (err) {
|
|
129
|
+
console.error('Research failed:', err);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Error Handling Example
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
import { search, SearchError } from 'llm-search-tools';
|
|
138
|
+
|
|
139
|
+
async function robustSearch(query: string) {
|
|
140
|
+
try {
|
|
141
|
+
const results = await search(query);
|
|
142
|
+
return results;
|
|
143
|
+
} catch (err) {
|
|
144
|
+
if (err.code === 'GOOGLE_SEARCH_ERROR') {
|
|
145
|
+
console.log('Google search failed, using fallback...');
|
|
146
|
+
// Handle error or try alternative
|
|
147
|
+
} else if (err.code === 'DDG_SEARCH_ERROR') {
|
|
148
|
+
console.log('DuckDuckGo search failed...');
|
|
149
|
+
// Handle error
|
|
150
|
+
} else {
|
|
151
|
+
console.error('Unknown error:', err);
|
|
152
|
+
}
|
|
153
|
+
return [];
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
These examples demonstrate some common use cases, but there are many more possibilities! Check out the module-specific documentation for more details on available functions and options.
|
package/docs/finance.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Finance Module 📈
|
|
2
|
+
|
|
3
|
+
The finance module provides real-time stock quotes and financial market data using Yahoo Finance.
|
|
4
|
+
|
|
5
|
+
## Functions
|
|
6
|
+
|
|
7
|
+
### getQuote(symbol: string)
|
|
8
|
+
|
|
9
|
+
Main finance function that retrieves stock quotes. Currently backed by Yahoo Finance.
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
import { getQuote } from "llm-search-tools";
|
|
13
|
+
|
|
14
|
+
const quote = await getQuote("AAPL");
|
|
15
|
+
console.log(`${quote.symbol}: $${quote.regularMarketPrice}`);
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### getStockQuote(symbol: string)
|
|
19
|
+
|
|
20
|
+
Direct access to the Yahoo Finance scraper.
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { getStockQuote } from "llm-search-tools";
|
|
24
|
+
|
|
25
|
+
const quote = await getStockQuote("GOOGL");
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Result Format
|
|
29
|
+
|
|
30
|
+
The module returns `FinanceResult` objects:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
interface FinanceResult {
|
|
34
|
+
symbol: string;
|
|
35
|
+
shortName?: string;
|
|
36
|
+
longName?: string;
|
|
37
|
+
regularMarketPrice?: number;
|
|
38
|
+
regularMarketChange?: number;
|
|
39
|
+
regularMarketChangePercent?: number;
|
|
40
|
+
regularMarketTime?: Date;
|
|
41
|
+
currency?: string;
|
|
42
|
+
exchange?: string;
|
|
43
|
+
marketState?: string;
|
|
44
|
+
source: "yahoo-finance";
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Error Handling
|
|
49
|
+
|
|
50
|
+
If the symbol is not found or the API fails, the function throws a `SearchError` with code `FINANCE_QUOTE_ERROR`.
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
try {
|
|
54
|
+
const quote = await getQuote("INVALID_SYMBOL");
|
|
55
|
+
} catch (err) {
|
|
56
|
+
if (err.code === "FINANCE_QUOTE_ERROR") {
|
|
57
|
+
console.error("Failed to fetch quote:", err.message);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
```
|