anybrowse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +102 -0
- package/dist/index.js.map +1 -0
- package/package.json +31 -0
- package/src/index.ts +157 -0
- package/tsconfig.json +17 -0
package/README.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# anybrowse
|
|
2
|
+
|
|
3
|
+
**Turn any URL into LLM-ready Markdown.** JavaScript/TypeScript SDK for [anybrowse.dev](https://anybrowse.dev).
|
|
4
|
+
|
|
5
|
+
Handles JavaScript, dynamic pages, paywalls, and anti-bot measures — so your agents don't have to.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install anybrowse
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
yarn add anybrowse
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
import { AnybrowseClient } from 'anybrowse';
|
|
25
|
+
|
|
26
|
+
const client = new AnybrowseClient();
|
|
27
|
+
const result = await client.scrape('https://techcrunch.com');
|
|
28
|
+
console.log(result.markdown);
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Three lines. Done.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
### Convenience functions (simplest)
|
|
38
|
+
|
|
39
|
+
```typescript
|
|
40
|
+
import { scrape, crawl, search } from "anybrowse";
|
|
41
|
+
|
|
42
|
+
// Scrape a URL → clean Markdown
|
|
43
|
+
const page = await scrape("https://news.ycombinator.com");
|
|
44
|
+
console.log(page.markdown);
|
|
45
|
+
|
|
46
|
+
// With API key + context
|
|
47
|
+
const priced = await scrape("https://stripe.com/pricing", {
|
|
48
|
+
apiKey: "your-api-key",
|
|
49
|
+
context: "Extract all pricing tiers and their features",
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// Crawl a site (multiple pages)
|
|
53
|
+
const site = await crawl("https://docs.example.com", { limit: 20 });
|
|
54
|
+
|
|
55
|
+
// Web search → Markdown results
|
|
56
|
+
const results = await search("best TypeScript web scraping 2024", { count: 5 });
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Client (reusable)
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
import { AnybrowseClient } from "anybrowse";
|
|
63
|
+
|
|
64
|
+
const client = new AnybrowseClient("your-api-key");
|
|
65
|
+
|
|
66
|
+
const page = await client.scrape("https://example.com");
|
|
67
|
+
const site = await client.crawl("https://docs.example.com", { limit: 10 });
|
|
68
|
+
const results = await client.search("AI agent frameworks 2024", { count: 5 });
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## With LangChain.js
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
import { AnybrowseClient } from "anybrowse";
|
|
77
|
+
import { DynamicTool } from "@langchain/core/tools";
|
|
78
|
+
|
|
79
|
+
const client = new AnybrowseClient(process.env.ANYBROWSE_API_KEY);
|
|
80
|
+
|
|
81
|
+
const scrapeTool = new DynamicTool({
|
|
82
|
+
name: "anybrowse_scrape",
|
|
83
|
+
description: "Convert any URL to clean, LLM-ready Markdown.",
|
|
84
|
+
func: async (url: string) => {
|
|
85
|
+
const result = await client.scrape(url);
|
|
86
|
+
return result.markdown ?? "";
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const searchTool = new DynamicTool({
|
|
91
|
+
name: "anybrowse_search",
|
|
92
|
+
description: "Search the web and return readable Markdown results.",
|
|
93
|
+
func: async (query: string) => {
|
|
94
|
+
const result = await client.search(query);
|
|
95
|
+
return JSON.stringify(result);
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## MCP (Model Context Protocol)
|
|
103
|
+
|
|
104
|
+
Use anybrowse directly in Claude Desktop:
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{
|
|
108
|
+
"mcpServers": {
|
|
109
|
+
"anybrowse": {
|
|
110
|
+
"command": "npx",
|
|
111
|
+
"args": ["-y", "anybrowse-mcp"],
|
|
112
|
+
"env": {
|
|
113
|
+
"ANYBROWSE_API_KEY": "your-api-key"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
→ [Full MCP setup guide](https://anybrowse.dev/docs/mcp)
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Free Tier
|
|
125
|
+
|
|
126
|
+
No API key needed to start. Just call the functions.
|
|
127
|
+
|
|
128
|
+
Need higher limits? → [anybrowse.dev/pricing](https://anybrowse.dev/pricing)
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Links
|
|
133
|
+
|
|
134
|
+
- 🌐 [anybrowse.dev](https://anybrowse.dev)
|
|
135
|
+
- 📖 [Docs](https://anybrowse.dev/docs)
|
|
136
|
+
- 💬 [GitHub](https://github.com/kc23go/anybrowse)
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
export interface ScrapeOptions {
|
|
2
|
+
context?: string;
|
|
3
|
+
}
|
|
4
|
+
export interface CrawlOptions {
|
|
5
|
+
limit?: number;
|
|
6
|
+
context?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface SearchOptions {
|
|
9
|
+
count?: number;
|
|
10
|
+
context?: string;
|
|
11
|
+
}
|
|
12
|
+
export interface ScrapeResult {
|
|
13
|
+
markdown?: string;
|
|
14
|
+
content?: string;
|
|
15
|
+
url?: string;
|
|
16
|
+
[key: string]: unknown;
|
|
17
|
+
}
|
|
18
|
+
export interface CrawlResult {
|
|
19
|
+
pages?: ScrapeResult[];
|
|
20
|
+
[key: string]: unknown;
|
|
21
|
+
}
|
|
22
|
+
export interface SearchResult {
|
|
23
|
+
markdown?: string;
|
|
24
|
+
results?: unknown[];
|
|
25
|
+
[key: string]: unknown;
|
|
26
|
+
}
|
|
27
|
+
export declare class AnybrowseClient {
|
|
28
|
+
private apiKey?;
|
|
29
|
+
private baseUrl;
|
|
30
|
+
private headers;
|
|
31
|
+
constructor(apiKey?: string, baseUrl?: string);
|
|
32
|
+
/**
|
|
33
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
34
|
+
*/
|
|
35
|
+
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
36
|
+
/**
|
|
37
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
38
|
+
*/
|
|
39
|
+
crawl(url: string, options?: CrawlOptions): Promise<CrawlResult>;
|
|
40
|
+
/**
|
|
41
|
+
* Search the web and return results as Markdown.
|
|
42
|
+
*/
|
|
43
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult>;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
47
|
+
*/
|
|
48
|
+
export declare function scrape(url: string, options?: ScrapeOptions & {
|
|
49
|
+
apiKey?: string;
|
|
50
|
+
}): Promise<ScrapeResult>;
|
|
51
|
+
/**
|
|
52
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
53
|
+
*/
|
|
54
|
+
export declare function crawl(url: string, options?: CrawlOptions & {
|
|
55
|
+
apiKey?: string;
|
|
56
|
+
}): Promise<CrawlResult>;
|
|
57
|
+
/**
|
|
58
|
+
* Search the web and return results as Markdown.
|
|
59
|
+
*/
|
|
60
|
+
export declare function search(query: string, options?: SearchOptions & {
|
|
61
|
+
apiKey?: string;
|
|
62
|
+
}): Promise<SearchResult>;
|
|
63
|
+
export default AnybrowseClient;
|
|
64
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,aAAa;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC;IACvB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,EAAE,CAAC;IACpB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAC,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,CAAC,EAAE,MAAM,EAAE,OAAO,GAAE,MAAsB;IAS5D;;OAEG;IACG,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAuB7E;;OAEG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;IAoB1E;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;CAmBhF;AAID;;GAEG;AACH,wBAAsB,MAAM,CAC1B,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,aAAa,GAAG;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAO,GAChD,OAAO,CAAC,YAAY,CAAC,CAGvB;AAED;;GAEG;AACH,wBAAsB,KAAK,CACzB,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAO,GAC/C,OAAO,CAAC,WAAW,CAAC,CAGtB;AAED;;GAEG;AACH,wBAAsB,MAAM,CAC1B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,aAAa,GAAG;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAO,GAChD,OAAO,CAAC,YAAY,CAAC,CAGvB;AAED,eAAe,eAAe,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AnybrowseClient = void 0;
|
|
4
|
+
exports.scrape = scrape;
|
|
5
|
+
exports.crawl = crawl;
|
|
6
|
+
exports.search = search;
|
|
7
|
+
const ANYBROWSE_API = "https://anybrowse.dev";
|
|
8
|
+
class AnybrowseClient {
|
|
9
|
+
constructor(apiKey, baseUrl = ANYBROWSE_API) {
|
|
10
|
+
this.apiKey = apiKey;
|
|
11
|
+
this.baseUrl = baseUrl;
|
|
12
|
+
this.headers = {
|
|
13
|
+
"Content-Type": "application/json",
|
|
14
|
+
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
19
|
+
*/
|
|
20
|
+
async scrape(url, options = {}) {
|
|
21
|
+
const payload = { url };
|
|
22
|
+
if (options.context)
|
|
23
|
+
payload.context = options.context;
|
|
24
|
+
const resp = await fetch(`${this.baseUrl}/scrape`, {
|
|
25
|
+
method: "POST",
|
|
26
|
+
headers: this.headers,
|
|
27
|
+
body: JSON.stringify(payload),
|
|
28
|
+
});
|
|
29
|
+
if (resp.status === 402) {
|
|
30
|
+
throw new Error("Free tier limit reached. Get Pro at anybrowse.dev/pricing");
|
|
31
|
+
}
|
|
32
|
+
if (!resp.ok) {
|
|
33
|
+
throw new Error(`anybrowse scrape failed: ${resp.status} ${resp.statusText}`);
|
|
34
|
+
}
|
|
35
|
+
return resp.json();
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
39
|
+
*/
|
|
40
|
+
async crawl(url, options = {}) {
|
|
41
|
+
const payload = {
|
|
42
|
+
url,
|
|
43
|
+
limit: options.limit ?? 10,
|
|
44
|
+
};
|
|
45
|
+
if (options.context)
|
|
46
|
+
payload.context = options.context;
|
|
47
|
+
const resp = await fetch(`${this.baseUrl}/crawl`, {
|
|
48
|
+
method: "POST",
|
|
49
|
+
headers: this.headers,
|
|
50
|
+
body: JSON.stringify(payload),
|
|
51
|
+
});
|
|
52
|
+
if (!resp.ok) {
|
|
53
|
+
throw new Error(`anybrowse crawl failed: ${resp.status} ${resp.statusText}`);
|
|
54
|
+
}
|
|
55
|
+
return resp.json();
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Search the web and return results as Markdown.
|
|
59
|
+
*/
|
|
60
|
+
async search(query, options = {}) {
|
|
61
|
+
const payload = {
|
|
62
|
+
q: query,
|
|
63
|
+
count: options.count ?? 5,
|
|
64
|
+
};
|
|
65
|
+
if (options.context)
|
|
66
|
+
payload.context = options.context;
|
|
67
|
+
const resp = await fetch(`${this.baseUrl}/serp/search`, {
|
|
68
|
+
method: "POST",
|
|
69
|
+
headers: this.headers,
|
|
70
|
+
body: JSON.stringify(payload),
|
|
71
|
+
});
|
|
72
|
+
if (!resp.ok) {
|
|
73
|
+
throw new Error(`anybrowse search failed: ${resp.status} ${resp.statusText}`);
|
|
74
|
+
}
|
|
75
|
+
return resp.json();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
exports.AnybrowseClient = AnybrowseClient;
|
|
79
|
+
// Convenience functions (no client instantiation needed)
|
|
80
|
+
/**
|
|
81
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
82
|
+
*/
|
|
83
|
+
async function scrape(url, options = {}) {
|
|
84
|
+
const { apiKey, ...rest } = options;
|
|
85
|
+
return new AnybrowseClient(apiKey).scrape(url, rest);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
89
|
+
*/
|
|
90
|
+
async function crawl(url, options = {}) {
|
|
91
|
+
const { apiKey, ...rest } = options;
|
|
92
|
+
return new AnybrowseClient(apiKey).crawl(url, rest);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Search the web and return results as Markdown.
|
|
96
|
+
*/
|
|
97
|
+
async function search(query, options = {}) {
|
|
98
|
+
const { apiKey, ...rest } = options;
|
|
99
|
+
return new AnybrowseClient(apiKey).search(query, rest);
|
|
100
|
+
}
|
|
101
|
+
exports.default = AnybrowseClient;
|
|
102
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AA8HA,wBAMC;AAKD,sBAMC;AAKD,wBAMC;AA1JD,MAAM,aAAa,GAAG,uBAAuB,CAAC;AAkC9C,MAAa,eAAe;IAK1B,YAAY,MAAe,EAAE,UAAkB,aAAa;QAC1D,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG;YACb,cAAc,EAAE,kBAAkB;YAClC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,OAAO,GAA4B,EAAE,GAAG,EAAE,CAAC;QACjD,IAAI,OAAO,CAAC,OAAO;YAAE,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAEvD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,SAAS,EAAE;YACjD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAC9B,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CACb,2DAA2D,CAC5D,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAChF,CAAC;QAED,OAAO,IAAI,CAAC,IAAI,EAA2B,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAAwB,EAAE;QACjD,MAAM,OAAO,GAA4B;YACvC,GAAG;YACH,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;SAC3B,CAAC;QACF,IAAI,OAAO,CAAC,OAAO;YAAE,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAEvD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,QAAQ,EAAE;YAChD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,2BAA2B,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC/E,CAAC;QAED,OAAO,IAAI,CAAC,IAAI,EAA0B,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,MAAM,OAAO,GAA4B;YACvC,CAAC,EAAE,KAAK;YACR,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC;SAC1B,CAAC;QACF,IAAI,OAAO,CAAC,OAAO;YAAE,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAEvD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,cAAc,EAAE;YACtD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAChF,CAAC;QAED,OAAO,IAAI,CAAC,IAAI,EAA2B,CAAC;IAC9C,CAAC;CACF;AArFD,0CAqFC;AAED,yDAAyD;AAEzD;;GAEG;AACI,KAAK,UAAU,MAAM,CAC1B,GAAW,EACX,UAA+C,EAAE;IAEjD,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IACpC,OAAO,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AACvD,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,KAAK,CACzB,GAAW,EACX,UAA8C,EAAE;IAEhD,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IACpC,OAAO,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,MAAM,CAC1B,KAAa,EACb,UAA+C,EAAE;IAEjD,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IACpC,OAAO,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,kBAAe,eAAe,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "anybrowse",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Turn any URL into LLM-ready Markdown. JS/TS SDK for anybrowse.dev.",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch"
|
|
10
|
+
},
|
|
11
|
+
"keywords": [
|
|
12
|
+
"web-scraping",
|
|
13
|
+
"llm",
|
|
14
|
+
"markdown",
|
|
15
|
+
"ai",
|
|
16
|
+
"browser",
|
|
17
|
+
"scraping",
|
|
18
|
+
"langchain"
|
|
19
|
+
],
|
|
20
|
+
"author": "anybrowse.dev",
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"dependencies": {},
|
|
23
|
+
"devDependencies": {
|
|
24
|
+
"typescript": "^5.0.0"
|
|
25
|
+
},
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "git+https://github.com/kc23go/anybrowse.git"
|
|
29
|
+
},
|
|
30
|
+
"homepage": "https://anybrowse.dev"
|
|
31
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const ANYBROWSE_API = "https://anybrowse.dev";
|
|
2
|
+
|
|
3
|
+
export interface ScrapeOptions {
|
|
4
|
+
context?: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface CrawlOptions {
|
|
8
|
+
limit?: number;
|
|
9
|
+
context?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface SearchOptions {
|
|
13
|
+
count?: number;
|
|
14
|
+
context?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface ScrapeResult {
|
|
18
|
+
markdown?: string;
|
|
19
|
+
content?: string;
|
|
20
|
+
url?: string;
|
|
21
|
+
[key: string]: unknown;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface CrawlResult {
|
|
25
|
+
pages?: ScrapeResult[];
|
|
26
|
+
[key: string]: unknown;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface SearchResult {
|
|
30
|
+
markdown?: string;
|
|
31
|
+
results?: unknown[];
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class AnybrowseClient {
|
|
36
|
+
private apiKey?: string;
|
|
37
|
+
private baseUrl: string;
|
|
38
|
+
private headers: Record<string, string>;
|
|
39
|
+
|
|
40
|
+
constructor(apiKey?: string, baseUrl: string = ANYBROWSE_API) {
|
|
41
|
+
this.apiKey = apiKey;
|
|
42
|
+
this.baseUrl = baseUrl;
|
|
43
|
+
this.headers = {
|
|
44
|
+
"Content-Type": "application/json",
|
|
45
|
+
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
51
|
+
*/
|
|
52
|
+
async scrape(url: string, options: ScrapeOptions = {}): Promise<ScrapeResult> {
|
|
53
|
+
const payload: Record<string, unknown> = { url };
|
|
54
|
+
if (options.context) payload.context = options.context;
|
|
55
|
+
|
|
56
|
+
const resp = await fetch(`${this.baseUrl}/scrape`, {
|
|
57
|
+
method: "POST",
|
|
58
|
+
headers: this.headers,
|
|
59
|
+
body: JSON.stringify(payload),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
if (resp.status === 402) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
"Free tier limit reached. Get Pro at anybrowse.dev/pricing"
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!resp.ok) {
|
|
69
|
+
throw new Error(`anybrowse scrape failed: ${resp.status} ${resp.statusText}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return resp.json() as Promise<ScrapeResult>;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
77
|
+
*/
|
|
78
|
+
async crawl(url: string, options: CrawlOptions = {}): Promise<CrawlResult> {
|
|
79
|
+
const payload: Record<string, unknown> = {
|
|
80
|
+
url,
|
|
81
|
+
limit: options.limit ?? 10,
|
|
82
|
+
};
|
|
83
|
+
if (options.context) payload.context = options.context;
|
|
84
|
+
|
|
85
|
+
const resp = await fetch(`${this.baseUrl}/crawl`, {
|
|
86
|
+
method: "POST",
|
|
87
|
+
headers: this.headers,
|
|
88
|
+
body: JSON.stringify(payload),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
if (!resp.ok) {
|
|
92
|
+
throw new Error(`anybrowse crawl failed: ${resp.status} ${resp.statusText}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return resp.json() as Promise<CrawlResult>;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Search the web and return results as Markdown.
|
|
100
|
+
*/
|
|
101
|
+
async search(query: string, options: SearchOptions = {}): Promise<SearchResult> {
|
|
102
|
+
const payload: Record<string, unknown> = {
|
|
103
|
+
q: query,
|
|
104
|
+
count: options.count ?? 5,
|
|
105
|
+
};
|
|
106
|
+
if (options.context) payload.context = options.context;
|
|
107
|
+
|
|
108
|
+
const resp = await fetch(`${this.baseUrl}/serp/search`, {
|
|
109
|
+
method: "POST",
|
|
110
|
+
headers: this.headers,
|
|
111
|
+
body: JSON.stringify(payload),
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
if (!resp.ok) {
|
|
115
|
+
throw new Error(`anybrowse search failed: ${resp.status} ${resp.statusText}`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return resp.json() as Promise<SearchResult>;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Convenience functions (no client instantiation needed)
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Convert any URL to clean, LLM-ready Markdown.
|
|
126
|
+
*/
|
|
127
|
+
export async function scrape(
|
|
128
|
+
url: string,
|
|
129
|
+
options: ScrapeOptions & { apiKey?: string } = {}
|
|
130
|
+
): Promise<ScrapeResult> {
|
|
131
|
+
const { apiKey, ...rest } = options;
|
|
132
|
+
return new AnybrowseClient(apiKey).scrape(url, rest);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Crawl a site and return multiple pages as Markdown.
|
|
137
|
+
*/
|
|
138
|
+
export async function crawl(
|
|
139
|
+
url: string,
|
|
140
|
+
options: CrawlOptions & { apiKey?: string } = {}
|
|
141
|
+
): Promise<CrawlResult> {
|
|
142
|
+
const { apiKey, ...rest } = options;
|
|
143
|
+
return new AnybrowseClient(apiKey).crawl(url, rest);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Search the web and return results as Markdown.
|
|
148
|
+
*/
|
|
149
|
+
export async function search(
|
|
150
|
+
query: string,
|
|
151
|
+
options: SearchOptions & { apiKey?: string } = {}
|
|
152
|
+
): Promise<SearchResult> {
|
|
153
|
+
const { apiKey, ...rest } = options;
|
|
154
|
+
return new AnybrowseClient(apiKey).search(query, rest);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export default AnybrowseClient;
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"lib": ["ES2020"],
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"declaration": true,
|
|
11
|
+
"declarationMap": true,
|
|
12
|
+
"sourceMap": true,
|
|
13
|
+
"skipLibCheck": true
|
|
14
|
+
},
|
|
15
|
+
"include": ["src/**/*"],
|
|
16
|
+
"exclude": ["node_modules", "dist"]
|
|
17
|
+
}
|