@spider-cloud/spider-client 0.1.85 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -0
- package/dist/client.d.ts +100 -2
- package/dist/client.js +211 -9
- package/dist/config.d.ts +75 -1
- package/dist/config.js +60 -1
- package/dist/index.d.ts +8 -2
- package/dist/index.js +18 -1
- package/package.json +7 -3
package/README.md
CHANGED
|
@@ -93,8 +93,68 @@ app.crawlUrl(url, crawlParams, stream, streamCallback);
|
|
|
93
93
|
- **`links(url, params)`**: Retrieve all links from the specified URL with optional parameters.
|
|
94
94
|
- **`screenshot(url, params)`**: Take a screenshot of the specified URL.
|
|
95
95
|
- **`transform(data, params)`**: Perform a fast HTML transformation to markdown or text.
|
|
96
|
+
- **`unblocker(url, params)`**: Unblock challenging websites with anti-bot bypass. Supports AI extraction with `custom_prompt`.
|
|
96
97
|
- **`getCredits()`**: Retrieve account's remaining credits.
|
|
97
98
|
|
|
99
|
+
### AI Studio Methods
|
|
100
|
+
|
|
101
|
+
AI Studio methods require an active AI Studio subscription.
|
|
102
|
+
|
|
103
|
+
- **`aiCrawl(url, prompt, params)`**: AI-guided crawling using natural language prompts.
|
|
104
|
+
- **`aiScrape(url, prompt, params)`**: AI-guided scraping using natural language prompts.
|
|
105
|
+
- **`aiSearch(prompt, params)`**: AI-enhanced web search using natural language queries.
|
|
106
|
+
- **`aiBrowser(url, prompt, params)`**: AI-guided browser automation using natural language commands.
|
|
107
|
+
- **`aiLinks(url, prompt, params)`**: AI-guided link extraction and filtering.
|
|
108
|
+
|
|
109
|
+
```javascript
|
|
110
|
+
// AI Scrape example
|
|
111
|
+
const result = await app.aiScrape(
|
|
112
|
+
"https://example.com/products",
|
|
113
|
+
"Extract all product names, prices, and descriptions"
|
|
114
|
+
);
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Unblocker with AI Extraction
|
|
118
|
+
|
|
119
|
+
```javascript
|
|
120
|
+
// Unblock and extract data using AI
|
|
121
|
+
const result = await app.unblocker("https://protected-site.com/products", {
|
|
122
|
+
custom_prompt: "Extract all product names and prices as JSON"
|
|
123
|
+
});
|
|
124
|
+
// Extracted data is available in result[0].metadata.extracted_data
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Unblocker with JSON Schema Extraction
|
|
128
|
+
|
|
129
|
+
Use JSON Schema for structured, validated extraction output:
|
|
130
|
+
|
|
131
|
+
```javascript
|
|
132
|
+
const result = await app.unblocker("https://protected-site.com/products", {
|
|
133
|
+
extraction_schema: {
|
|
134
|
+
name: "products",
|
|
135
|
+
description: "Product listing extraction",
|
|
136
|
+
schema: JSON.stringify({
|
|
137
|
+
type: "object",
|
|
138
|
+
properties: {
|
|
139
|
+
products: {
|
|
140
|
+
type: "array",
|
|
141
|
+
items: {
|
|
142
|
+
type: "object",
|
|
143
|
+
properties: {
|
|
144
|
+
name: { type: "string" },
|
|
145
|
+
price: { type: "number" }
|
|
146
|
+
},
|
|
147
|
+
required: ["name", "price"]
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}),
|
|
152
|
+
strict: true
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
// Extracted data conforms to the schema in result[0].metadata.extracted_data
|
|
156
|
+
```
|
|
157
|
+
|
|
98
158
|
## Error Handling
|
|
99
159
|
|
|
100
160
|
The SDK provides robust error handling and will throw exceptions when it encounters critical issues. Always use `.catch()` on promises to handle these errors gracefully.
|
package/dist/client.d.ts
CHANGED
|
@@ -1,4 +1,16 @@
|
|
|
1
|
-
import { ChunkCallbackFunction, Collection, SpiderCoreResponse, SpiderParams, SearchRequestParams, RequestParamsTransform } from "./config";
|
|
1
|
+
import { ChunkCallbackFunction, Collection, SpiderCoreResponse, SpiderParams, SearchRequestParams, RequestParamsTransform, AIRequestParams, AIStudioTier } from "./config";
|
|
2
|
+
import { SpiderBrowser, type SpiderBrowserOptions } from "spider-browser";
|
|
3
|
+
/**
|
|
4
|
+
* Rate limit state from API response headers.
|
|
5
|
+
*/
|
|
6
|
+
export interface RateLimitInfo {
|
|
7
|
+
/** Maximum requests allowed per minute. */
|
|
8
|
+
limit: number;
|
|
9
|
+
/** Requests remaining in the current window. */
|
|
10
|
+
remaining: number;
|
|
11
|
+
/** Seconds until the rate limit window resets. */
|
|
12
|
+
resetSeconds: number;
|
|
13
|
+
}
|
|
2
14
|
/**
|
|
3
15
|
* Generic params for core request.
|
|
4
16
|
*/
|
|
@@ -14,12 +26,28 @@ export interface SpiderConfig {
|
|
|
14
26
|
*/
|
|
15
27
|
export declare class Spider {
|
|
16
28
|
private apiKey?;
|
|
29
|
+
private aiRateLimiter;
|
|
30
|
+
private aiStudioTier;
|
|
31
|
+
/** The latest rate limit state from API response headers. */
|
|
32
|
+
rateLimit: RateLimitInfo;
|
|
17
33
|
/**
|
|
18
34
|
* Create an instance of Spider.
|
|
19
35
|
* @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables.
|
|
36
|
+
* @param {AIStudioTier} aiStudioTier - The AI Studio subscription tier for rate limiting. Defaults to 'starter'.
|
|
20
37
|
* @throws Will throw an error if the API key is not provided.
|
|
21
38
|
*/
|
|
22
|
-
constructor(props?: SpiderConfig
|
|
39
|
+
constructor(props?: SpiderConfig & {
|
|
40
|
+
aiStudioTier?: AIStudioTier;
|
|
41
|
+
});
|
|
42
|
+
/**
|
|
43
|
+
* Update the AI Studio subscription tier (adjusts rate limiting).
|
|
44
|
+
* @param {AIStudioTier} tier - The new subscription tier.
|
|
45
|
+
*/
|
|
46
|
+
setAIStudioTier(tier: AIStudioTier): void;
|
|
47
|
+
/**
|
|
48
|
+
* Update rate limit state from response headers.
|
|
49
|
+
*/
|
|
50
|
+
private _updateRateLimit;
|
|
23
51
|
/**
|
|
24
52
|
* Internal method to handle POST requests.
|
|
25
53
|
* @param {string} endpoint - The API endpoint to which the POST request should be sent.
|
|
@@ -120,6 +148,76 @@ export declare class Spider {
|
|
|
120
148
|
Authorization: string;
|
|
121
149
|
"User-Agent": string;
|
|
122
150
|
};
|
|
151
|
+
/**
|
|
152
|
+
* Internal method to handle AI Studio POST requests with rate limiting.
|
|
153
|
+
* @param {string} endpoint - The AI Studio endpoint.
|
|
154
|
+
* @param {Record<string, any>} data - The request data including prompt.
|
|
155
|
+
* @returns {Promise<any>} The response data.
|
|
156
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
157
|
+
* @throws {AIStudioRateLimitExceeded} When rate limit is exceeded server-side.
|
|
158
|
+
*/
|
|
159
|
+
private _aiApiPost;
|
|
160
|
+
/**
|
|
161
|
+
* AI-guided crawling using natural language prompts.
|
|
162
|
+
* Requires an active AI Studio subscription.
|
|
163
|
+
* @param {string} url - The URL to start crawling.
|
|
164
|
+
* @param {string} prompt - Natural language instruction for what to crawl and extract.
|
|
165
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for the crawl.
|
|
166
|
+
* @returns {Promise<any>} The crawl results guided by the AI prompt.
|
|
167
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
168
|
+
*/
|
|
169
|
+
aiCrawl(url: string, prompt: string, params?: Omit<AIRequestParams, "prompt">): Promise<any>;
|
|
170
|
+
/**
|
|
171
|
+
* AI-guided scraping using natural language prompts.
|
|
172
|
+
* Requires an active AI Studio subscription.
|
|
173
|
+
* @param {string} url - The URL to scrape.
|
|
174
|
+
* @param {string} prompt - Natural language description of data to extract.
|
|
175
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for the scrape.
|
|
176
|
+
* @returns {Promise<any>} The scraped data guided by the AI prompt.
|
|
177
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
178
|
+
*/
|
|
179
|
+
aiScrape(url: string, prompt: string, params?: Omit<AIRequestParams, "prompt">): Promise<any>;
|
|
180
|
+
/**
|
|
181
|
+
* AI-enhanced web search using natural language queries.
|
|
182
|
+
* Requires an active AI Studio subscription.
|
|
183
|
+
* @param {string} prompt - Natural language search query.
|
|
184
|
+
* @param {SearchRequestParams} [params={}] - Additional search parameters.
|
|
185
|
+
* @returns {Promise<any>} The search results with AI-enhanced relevance.
|
|
186
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
187
|
+
*/
|
|
188
|
+
aiSearch(prompt: string, params?: SearchRequestParams): Promise<any>;
|
|
189
|
+
/**
|
|
190
|
+
* AI-guided browser automation using natural language commands.
|
|
191
|
+
* Requires an active AI Studio subscription.
|
|
192
|
+
* @param {string} url - The URL to automate.
|
|
193
|
+
* @param {string} prompt - Natural language description of browser actions.
|
|
194
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for automation.
|
|
195
|
+
* @returns {Promise<any>} The automation results.
|
|
196
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
197
|
+
*/
|
|
198
|
+
aiBrowser(url: string, prompt: string, params?: Omit<AIRequestParams, "prompt">): Promise<any>;
|
|
199
|
+
/**
|
|
200
|
+
* AI-guided link extraction and filtering.
|
|
201
|
+
* Requires an active AI Studio subscription.
|
|
202
|
+
* @param {string} url - The URL to extract links from.
|
|
203
|
+
* @param {string} prompt - Natural language description of what links to find.
|
|
204
|
+
* @param {AIRequestParams} [params={}] - Additional parameters.
|
|
205
|
+
* @returns {Promise<any>} The filtered links based on AI analysis.
|
|
206
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
207
|
+
*/
|
|
208
|
+
aiLinks(url: string, prompt: string, params?: Omit<AIRequestParams, "prompt">): Promise<any>;
|
|
209
|
+
/**
|
|
210
|
+
* Creates a SpiderBrowser instance for WebSocket-based browser automation (CDP/BiDi).
|
|
211
|
+
* @param {Omit<SpiderBrowserOptions, 'apiKey'>} [options] - Browser options (excluding apiKey, which is inherited from the client).
|
|
212
|
+
* @returns {SpiderBrowser} A new SpiderBrowser instance.
|
|
213
|
+
*/
|
|
214
|
+
browser(options?: Omit<SpiderBrowserOptions, "apiKey">): SpiderBrowser;
|
|
215
|
+
/**
|
|
216
|
+
* Generates the API URL for a recording session's video/metadata.
|
|
217
|
+
* @param {string} sessionId - The recording session ID.
|
|
218
|
+
* @returns {string} The full URL to the recording endpoint.
|
|
219
|
+
*/
|
|
220
|
+
static getRecordingVideoUrl(sessionId: string): string;
|
|
123
221
|
/**
|
|
124
222
|
* Handles errors from API requests.
|
|
125
223
|
* @param {Response} response - The fetch response object.
|
package/dist/client.js
CHANGED
|
@@ -2,9 +2,54 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.Spider = void 0;
|
|
4
4
|
const config_1 = require("./config");
|
|
5
|
+
const spider_browser_1 = require("spider-browser");
|
|
5
6
|
const package_json_1 = require("../package.json");
|
|
6
7
|
const stream_reader_1 = require("./utils/stream-reader");
|
|
7
8
|
const exponential_backoff_1 = require("exponential-backoff");
|
|
9
|
+
/**
|
|
10
|
+
* Simple client-side rate limiter for AI Studio endpoints.
|
|
11
|
+
* Uses a sliding window approach to limit requests per second.
|
|
12
|
+
*/
|
|
13
|
+
class RateLimiter {
|
|
14
|
+
constructor(requestsPerSecond) {
|
|
15
|
+
this.timestamps = [];
|
|
16
|
+
this.maxRequests = requestsPerSecond;
|
|
17
|
+
this.windowMs = 1000;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Update the rate limit (e.g., when user tier changes).
|
|
21
|
+
*/
|
|
22
|
+
setLimit(requestsPerSecond) {
|
|
23
|
+
this.maxRequests = requestsPerSecond;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Check if a request can be made. If not, returns the ms to wait.
|
|
27
|
+
* If yes, records the request and returns 0.
|
|
28
|
+
*/
|
|
29
|
+
tryAcquire() {
|
|
30
|
+
const now = Date.now();
|
|
31
|
+
// Remove timestamps outside the window
|
|
32
|
+
this.timestamps = this.timestamps.filter((t) => now - t < this.windowMs);
|
|
33
|
+
if (this.timestamps.length >= this.maxRequests) {
|
|
34
|
+
// Calculate how long to wait
|
|
35
|
+
const oldestInWindow = this.timestamps[0];
|
|
36
|
+
const waitTime = this.windowMs - (now - oldestInWindow);
|
|
37
|
+
return Math.max(1, waitTime);
|
|
38
|
+
}
|
|
39
|
+
this.timestamps.push(now);
|
|
40
|
+
return 0;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Wait until a request can be made, then acquire the slot.
|
|
44
|
+
*/
|
|
45
|
+
async acquire() {
|
|
46
|
+
const waitTime = this.tryAcquire();
|
|
47
|
+
if (waitTime > 0) {
|
|
48
|
+
await new Promise((resolve) => setTimeout(resolve, waitTime));
|
|
49
|
+
return this.acquire();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
8
53
|
/**
|
|
9
54
|
* A class to interact with the Spider API.
|
|
10
55
|
*/
|
|
@@ -12,15 +57,42 @@ class Spider {
|
|
|
12
57
|
/**
|
|
13
58
|
* Create an instance of Spider.
|
|
14
59
|
* @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables.
|
|
60
|
+
* @param {AIStudioTier} aiStudioTier - The AI Studio subscription tier for rate limiting. Defaults to 'starter'.
|
|
15
61
|
* @throws Will throw an error if the API key is not provided.
|
|
16
62
|
*/
|
|
17
63
|
constructor(props) {
|
|
18
64
|
var _a;
|
|
65
|
+
/** The latest rate limit state from API response headers. */
|
|
66
|
+
this.rateLimit = { limit: 0, remaining: 0, resetSeconds: 0 };
|
|
19
67
|
this.apiKey = (props === null || props === void 0 ? void 0 : props.apiKey) || ((_a = process === null || process === void 0 ? void 0 : process.env) === null || _a === void 0 ? void 0 : _a.SPIDER_API_KEY);
|
|
68
|
+
this.aiStudioTier = (props === null || props === void 0 ? void 0 : props.aiStudioTier) || "starter";
|
|
69
|
+
this.aiRateLimiter = new RateLimiter(config_1.AI_STUDIO_RATE_LIMITS[this.aiStudioTier]);
|
|
20
70
|
if (!this.apiKey) {
|
|
21
71
|
throw new Error("No API key provided");
|
|
22
72
|
}
|
|
23
73
|
}
|
|
74
|
+
/**
|
|
75
|
+
* Update the AI Studio subscription tier (adjusts rate limiting).
|
|
76
|
+
* @param {AIStudioTier} tier - The new subscription tier.
|
|
77
|
+
*/
|
|
78
|
+
setAIStudioTier(tier) {
|
|
79
|
+
this.aiStudioTier = tier;
|
|
80
|
+
this.aiRateLimiter.setLimit(config_1.AI_STUDIO_RATE_LIMITS[tier]);
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Update rate limit state from response headers.
|
|
84
|
+
*/
|
|
85
|
+
_updateRateLimit(headers) {
|
|
86
|
+
const limit = headers.get("RateLimit-Limit");
|
|
87
|
+
const remaining = headers.get("RateLimit-Remaining");
|
|
88
|
+
const reset = headers.get("RateLimit-Reset");
|
|
89
|
+
if (limit)
|
|
90
|
+
this.rateLimit.limit = Number(limit);
|
|
91
|
+
if (remaining)
|
|
92
|
+
this.rateLimit.remaining = Number(remaining);
|
|
93
|
+
if (reset)
|
|
94
|
+
this.rateLimit.resetSeconds = Number(reset);
|
|
95
|
+
}
|
|
24
96
|
/**
|
|
25
97
|
* Internal method to handle POST requests.
|
|
26
98
|
* @param {string} endpoint - The API endpoint to which the POST request should be sent.
|
|
@@ -30,11 +102,20 @@ class Spider {
|
|
|
30
102
|
*/
|
|
31
103
|
async _apiPost(endpoint, data, stream, jsonl) {
|
|
32
104
|
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
|
|
33
|
-
const response = await (0, exponential_backoff_1.backOff)(() =>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
105
|
+
const response = await (0, exponential_backoff_1.backOff)(async () => {
|
|
106
|
+
const res = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
107
|
+
method: "POST",
|
|
108
|
+
headers: headers,
|
|
109
|
+
body: JSON.stringify(data),
|
|
110
|
+
});
|
|
111
|
+
this._updateRateLimit(res.headers);
|
|
112
|
+
if (res.status === 429) {
|
|
113
|
+
const retryAfter = Number(res.headers.get("Retry-After") || "1");
|
|
114
|
+
await new Promise((r) => setTimeout(r, retryAfter * 1000));
|
|
115
|
+
throw new Error(`Rate limited on ${endpoint}. Retrying after ${retryAfter}s.`);
|
|
116
|
+
}
|
|
117
|
+
return res;
|
|
118
|
+
}, {
|
|
38
119
|
numOfAttempts: 5,
|
|
39
120
|
});
|
|
40
121
|
if (!stream) {
|
|
@@ -54,10 +135,19 @@ class Spider {
|
|
|
54
135
|
*/
|
|
55
136
|
async _apiGet(endpoint) {
|
|
56
137
|
const headers = this.prepareHeaders;
|
|
57
|
-
const response = await (0, exponential_backoff_1.backOff)(() =>
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
138
|
+
const response = await (0, exponential_backoff_1.backOff)(async () => {
|
|
139
|
+
const res = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
140
|
+
method: "GET",
|
|
141
|
+
headers: headers,
|
|
142
|
+
});
|
|
143
|
+
this._updateRateLimit(res.headers);
|
|
144
|
+
if (res.status === 429) {
|
|
145
|
+
const retryAfter = Number(res.headers.get("Retry-After") || "1");
|
|
146
|
+
await new Promise((r) => setTimeout(r, retryAfter * 1000));
|
|
147
|
+
throw new Error(`Rate limited on ${endpoint}. Retrying after ${retryAfter}s.`);
|
|
148
|
+
}
|
|
149
|
+
return res;
|
|
150
|
+
}, {
|
|
61
151
|
numOfAttempts: 5,
|
|
62
152
|
});
|
|
63
153
|
if (response.ok) {
|
|
@@ -187,6 +277,118 @@ class Spider {
|
|
|
187
277
|
"Content-Type": "application/jsonl",
|
|
188
278
|
};
|
|
189
279
|
}
|
|
280
|
+
/**
|
|
281
|
+
* Internal method to handle AI Studio POST requests with rate limiting.
|
|
282
|
+
* @param {string} endpoint - The AI Studio endpoint.
|
|
283
|
+
* @param {Record<string, any>} data - The request data including prompt.
|
|
284
|
+
* @returns {Promise<any>} The response data.
|
|
285
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
286
|
+
* @throws {AIStudioRateLimitExceeded} When rate limit is exceeded server-side.
|
|
287
|
+
*/
|
|
288
|
+
async _aiApiPost(endpoint, data) {
|
|
289
|
+
// Apply client-side rate limiting
|
|
290
|
+
await this.aiRateLimiter.acquire();
|
|
291
|
+
const headers = this.prepareHeaders;
|
|
292
|
+
const response = await (0, exponential_backoff_1.backOff)(() => fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
293
|
+
method: "POST",
|
|
294
|
+
headers: headers,
|
|
295
|
+
body: JSON.stringify(data),
|
|
296
|
+
}), {
|
|
297
|
+
numOfAttempts: 3,
|
|
298
|
+
retry: (e, attemptNumber) => {
|
|
299
|
+
// Don't retry on subscription or rate limit errors
|
|
300
|
+
return attemptNumber < 3;
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
if (response.ok) {
|
|
304
|
+
return response.json();
|
|
305
|
+
}
|
|
306
|
+
// Handle AI Studio specific errors
|
|
307
|
+
if (response.status === 402) {
|
|
308
|
+
throw new config_1.AIStudioSubscriptionRequired();
|
|
309
|
+
}
|
|
310
|
+
if (response.status === 429) {
|
|
311
|
+
const retryAfter = response.headers.get("Retry-After");
|
|
312
|
+
const retryAfterMs = retryAfter ? parseInt(retryAfter) * 1000 : 1000;
|
|
313
|
+
throw new config_1.AIStudioRateLimitExceeded(retryAfterMs);
|
|
314
|
+
}
|
|
315
|
+
this.handleError(response, `AI request to ${endpoint}`);
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* AI-guided crawling using natural language prompts.
|
|
319
|
+
* Requires an active AI Studio subscription.
|
|
320
|
+
* @param {string} url - The URL to start crawling.
|
|
321
|
+
* @param {string} prompt - Natural language instruction for what to crawl and extract.
|
|
322
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for the crawl.
|
|
323
|
+
* @returns {Promise<any>} The crawl results guided by the AI prompt.
|
|
324
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
325
|
+
*/
|
|
326
|
+
async aiCrawl(url, prompt, params = {}) {
|
|
327
|
+
return this._aiApiPost(config_1.APIRoutes.AICrawl, { url, prompt, ...params });
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* AI-guided scraping using natural language prompts.
|
|
331
|
+
* Requires an active AI Studio subscription.
|
|
332
|
+
* @param {string} url - The URL to scrape.
|
|
333
|
+
* @param {string} prompt - Natural language description of data to extract.
|
|
334
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for the scrape.
|
|
335
|
+
* @returns {Promise<any>} The scraped data guided by the AI prompt.
|
|
336
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
337
|
+
*/
|
|
338
|
+
async aiScrape(url, prompt, params = {}) {
|
|
339
|
+
return this._aiApiPost(config_1.APIRoutes.AIScrape, { url, prompt, ...params });
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* AI-enhanced web search using natural language queries.
|
|
343
|
+
* Requires an active AI Studio subscription.
|
|
344
|
+
* @param {string} prompt - Natural language search query.
|
|
345
|
+
* @param {SearchRequestParams} [params={}] - Additional search parameters.
|
|
346
|
+
* @returns {Promise<any>} The search results with AI-enhanced relevance.
|
|
347
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
348
|
+
*/
|
|
349
|
+
async aiSearch(prompt, params = {}) {
|
|
350
|
+
return this._aiApiPost(config_1.APIRoutes.AISearch, { prompt, ...params });
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* AI-guided browser automation using natural language commands.
|
|
354
|
+
* Requires an active AI Studio subscription.
|
|
355
|
+
* @param {string} url - The URL to automate.
|
|
356
|
+
* @param {string} prompt - Natural language description of browser actions.
|
|
357
|
+
* @param {AIRequestParams} [params={}] - Additional parameters for automation.
|
|
358
|
+
* @returns {Promise<any>} The automation results.
|
|
359
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
360
|
+
*/
|
|
361
|
+
async aiBrowser(url, prompt, params = {}) {
|
|
362
|
+
return this._aiApiPost(config_1.APIRoutes.AIBrowser, { url, prompt, ...params });
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* AI-guided link extraction and filtering.
|
|
366
|
+
* Requires an active AI Studio subscription.
|
|
367
|
+
* @param {string} url - The URL to extract links from.
|
|
368
|
+
* @param {string} prompt - Natural language description of what links to find.
|
|
369
|
+
* @param {AIRequestParams} [params={}] - Additional parameters.
|
|
370
|
+
* @returns {Promise<any>} The filtered links based on AI analysis.
|
|
371
|
+
* @throws {AIStudioSubscriptionRequired} When subscription is not active.
|
|
372
|
+
*/
|
|
373
|
+
async aiLinks(url, prompt, params = {}) {
|
|
374
|
+
return this._aiApiPost(config_1.APIRoutes.AILinks, { url, prompt, ...params });
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Creates a SpiderBrowser instance for WebSocket-based browser automation (CDP/BiDi).
|
|
378
|
+
* @param {Omit<SpiderBrowserOptions, 'apiKey'>} [options] - Browser options (excluding apiKey, which is inherited from the client).
|
|
379
|
+
* @returns {SpiderBrowser} A new SpiderBrowser instance.
|
|
380
|
+
*/
|
|
381
|
+
browser(options) {
|
|
382
|
+
return new spider_browser_1.SpiderBrowser({ apiKey: this.apiKey, ...options });
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Generates the API URL for a recording session's video/metadata.
|
|
386
|
+
* @param {string} sessionId - The recording session ID.
|
|
387
|
+
* @returns {string} The full URL to the recording endpoint.
|
|
388
|
+
*/
|
|
389
|
+
static getRecordingVideoUrl(sessionId) {
|
|
390
|
+
return `${config_1.APISchema.url}/v1/data/recordings/${sessionId}`;
|
|
391
|
+
}
|
|
190
392
|
/**
|
|
191
393
|
* Handles errors from API requests.
|
|
192
394
|
* @param {Response} response - The fetch response object.
|
package/dist/config.d.ts
CHANGED
|
@@ -598,7 +598,81 @@ export declare enum APIRoutes {
|
|
|
598
598
|
Search = "search",
|
|
599
599
|
Transform = "transform",
|
|
600
600
|
Data = "data",
|
|
601
|
-
DataCredits = "data/credits"
|
|
601
|
+
DataCredits = "data/credits",
|
|
602
|
+
AICrawl = "ai/crawl",
|
|
603
|
+
AIScrape = "ai/scrape",
|
|
604
|
+
AISearch = "ai/search",
|
|
605
|
+
AIBrowser = "ai/browser",
|
|
606
|
+
AILinks = "ai/links"
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* AI Studio subscription tiers with their rate limits (requests per second).
|
|
610
|
+
*/
|
|
611
|
+
export declare const AI_STUDIO_RATE_LIMITS: {
|
|
612
|
+
readonly starter: 1;
|
|
613
|
+
readonly lite: 5;
|
|
614
|
+
readonly standard: 10;
|
|
615
|
+
readonly custom: 25;
|
|
616
|
+
};
|
|
617
|
+
export type AIStudioTier = keyof typeof AI_STUDIO_RATE_LIMITS;
|
|
618
|
+
/**
|
|
619
|
+
* Parameters for AI Studio endpoints.
|
|
620
|
+
* All AI endpoints require a 'prompt' parameter for natural language instructions.
|
|
621
|
+
*/
|
|
622
|
+
export interface AIRequestParams extends Omit<SpiderParams, "url"> {
|
|
623
|
+
/** Natural language instruction for AI-guided extraction */
|
|
624
|
+
prompt: string;
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* AI Studio subscription info and URLs.
|
|
628
|
+
*/
|
|
629
|
+
export declare const AIStudioInfo: {
|
|
630
|
+
/** Base URL for AI Studio */
|
|
631
|
+
readonly baseUrl: "https://aistudio.spider.cloud";
|
|
632
|
+
/** URL for pricing/subscription page */
|
|
633
|
+
readonly pricingUrl: "https://aistudio.spider.cloud/pricing";
|
|
634
|
+
/** URL for AI Studio documentation */
|
|
635
|
+
readonly docsUrl: "https://aistudio.spider.cloud/docs";
|
|
636
|
+
/** Available subscription tiers */
|
|
637
|
+
readonly tiers: {
|
|
638
|
+
readonly starter: {
|
|
639
|
+
readonly price: 6;
|
|
640
|
+
readonly credits: 30000;
|
|
641
|
+
readonly rateLimit: 1;
|
|
642
|
+
};
|
|
643
|
+
readonly lite: {
|
|
644
|
+
readonly price: 30;
|
|
645
|
+
readonly credits: 150000;
|
|
646
|
+
readonly rateLimit: 5;
|
|
647
|
+
};
|
|
648
|
+
readonly standard: {
|
|
649
|
+
readonly price: 125;
|
|
650
|
+
readonly credits: 600000;
|
|
651
|
+
readonly rateLimit: 10;
|
|
652
|
+
};
|
|
653
|
+
readonly custom: {
|
|
654
|
+
readonly price: 600;
|
|
655
|
+
readonly credits: 3000000;
|
|
656
|
+
readonly rateLimit: 25;
|
|
657
|
+
};
|
|
658
|
+
};
|
|
659
|
+
};
|
|
660
|
+
/**
|
|
661
|
+
* Error thrown when AI Studio subscription is required but not active.
|
|
662
|
+
*/
|
|
663
|
+
export declare class AIStudioSubscriptionRequired extends Error {
|
|
664
|
+
/** URL to subscribe to AI Studio */
|
|
665
|
+
subscribeUrl: string;
|
|
666
|
+
constructor(message?: string);
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Error thrown when AI Studio rate limit is exceeded.
|
|
670
|
+
*/
|
|
671
|
+
export declare class AIStudioRateLimitExceeded extends Error {
|
|
672
|
+
retryAfterMs: number;
|
|
673
|
+
/** URL to upgrade subscription for higher rate limits */
|
|
674
|
+
upgradeUrl: string;
|
|
675
|
+
constructor(retryAfterMs: number, currentTier?: AIStudioTier);
|
|
602
676
|
}
|
|
603
677
|
export declare const APISchema: {
|
|
604
678
|
url: string;
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.setBaseUrl = exports.APISchema = exports.APIRoutes = exports.ApiVersion = exports.Collection = exports.RedirectPolicy = void 0;
|
|
3
|
+
exports.setBaseUrl = exports.APISchema = exports.AIStudioRateLimitExceeded = exports.AIStudioSubscriptionRequired = exports.AIStudioInfo = exports.AI_STUDIO_RATE_LIMITS = exports.APIRoutes = exports.ApiVersion = exports.Collection = exports.RedirectPolicy = void 0;
|
|
4
4
|
// The HTTP redirect policy to use. Loose allows all domains and Strict only allows relative requests to the domain.
|
|
5
5
|
var RedirectPolicy;
|
|
6
6
|
(function (RedirectPolicy) {
|
|
@@ -48,7 +48,66 @@ var APIRoutes;
|
|
|
48
48
|
APIRoutes["Data"] = "data";
|
|
49
49
|
// Get the credits remaining for an account.
|
|
50
50
|
APIRoutes["DataCredits"] = "data/credits";
|
|
51
|
+
// AI Studio endpoints (requires AI Studio subscription)
|
|
52
|
+
APIRoutes["AICrawl"] = "ai/crawl";
|
|
53
|
+
APIRoutes["AIScrape"] = "ai/scrape";
|
|
54
|
+
APIRoutes["AISearch"] = "ai/search";
|
|
55
|
+
APIRoutes["AIBrowser"] = "ai/browser";
|
|
56
|
+
APIRoutes["AILinks"] = "ai/links";
|
|
51
57
|
})(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
|
|
58
|
+
/**
|
|
59
|
+
* AI Studio subscription tiers with their rate limits (requests per second).
|
|
60
|
+
*/
|
|
61
|
+
exports.AI_STUDIO_RATE_LIMITS = {
|
|
62
|
+
starter: 1,
|
|
63
|
+
lite: 5,
|
|
64
|
+
standard: 10,
|
|
65
|
+
custom: 25,
|
|
66
|
+
};
|
|
67
|
+
/**
|
|
68
|
+
* AI Studio subscription info and URLs.
|
|
69
|
+
*/
|
|
70
|
+
exports.AIStudioInfo = {
|
|
71
|
+
/** Base URL for AI Studio */
|
|
72
|
+
baseUrl: "https://aistudio.spider.cloud",
|
|
73
|
+
/** URL for pricing/subscription page */
|
|
74
|
+
pricingUrl: "https://aistudio.spider.cloud/pricing",
|
|
75
|
+
/** URL for AI Studio documentation */
|
|
76
|
+
docsUrl: "https://aistudio.spider.cloud/docs",
|
|
77
|
+
/** Available subscription tiers */
|
|
78
|
+
tiers: {
|
|
79
|
+
starter: { price: 6, credits: 30000, rateLimit: 1 },
|
|
80
|
+
lite: { price: 30, credits: 150000, rateLimit: 5 },
|
|
81
|
+
standard: { price: 125, credits: 600000, rateLimit: 10 },
|
|
82
|
+
custom: { price: 600, credits: 3000000, rateLimit: 25 },
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Error thrown when AI Studio subscription is required but not active.
|
|
87
|
+
*/
|
|
88
|
+
class AIStudioSubscriptionRequired extends Error {
|
|
89
|
+
constructor(message = "AI Studio subscription required to use /ai/* endpoints.") {
|
|
90
|
+
super(`${message}\n\nSubscribe at: ${exports.AIStudioInfo.pricingUrl}\n\nPlans start at $${exports.AIStudioInfo.tiers.starter.price}/month with ${exports.AIStudioInfo.tiers.starter.credits.toLocaleString()} credits.`);
|
|
91
|
+
this.name = "AIStudioSubscriptionRequired";
|
|
92
|
+
this.subscribeUrl = exports.AIStudioInfo.pricingUrl;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
exports.AIStudioSubscriptionRequired = AIStudioSubscriptionRequired;
|
|
96
|
+
/**
|
|
97
|
+
* Error thrown when AI Studio rate limit is exceeded.
|
|
98
|
+
*/
|
|
99
|
+
class AIStudioRateLimitExceeded extends Error {
|
|
100
|
+
constructor(retryAfterMs, currentTier) {
|
|
101
|
+
const upgradeHint = currentTier && currentTier !== "custom"
|
|
102
|
+
? `\n\nUpgrade your plan for higher rate limits: ${exports.AIStudioInfo.pricingUrl}`
|
|
103
|
+
: "";
|
|
104
|
+
super(`AI Studio rate limit exceeded. Retry after ${retryAfterMs}ms.${upgradeHint}`);
|
|
105
|
+
this.name = "AIStudioRateLimitExceeded";
|
|
106
|
+
this.retryAfterMs = retryAfterMs;
|
|
107
|
+
this.upgradeUrl = exports.AIStudioInfo.pricingUrl;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.AIStudioRateLimitExceeded = AIStudioRateLimitExceeded;
|
|
52
111
|
// The base API target info for Spider Cloud.
|
|
53
112
|
exports.APISchema = {
|
|
54
113
|
url: "https://api.spider.cloud",
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
1
|
export { Spider } from "./client";
|
|
2
|
-
export { Collection, setBaseUrl, APISchema } from "./config";
|
|
3
|
-
export type { SpiderParams, Budget, Viewport, QueryRequest } from "./config";
|
|
2
|
+
export { Collection, setBaseUrl, APISchema, AI_STUDIO_RATE_LIMITS, AIStudioInfo, AIStudioSubscriptionRequired, AIStudioRateLimitExceeded, } from "./config";
|
|
3
|
+
export type { SpiderParams, Budget, Viewport, QueryRequest, AIRequestParams, AIStudioTier, } from "./config";
|
|
4
|
+
export { SpiderBrowser, SpiderPage } from "spider-browser";
|
|
5
|
+
export type { SpiderBrowserOptions, SpiderEvents } from "spider-browser";
|
|
6
|
+
export { Agent, act, observe, extract } from "spider-browser";
|
|
7
|
+
export type { AgentOptions, AgentResult, ObserveResult } from "spider-browser";
|
|
8
|
+
export { RetryEngine } from "spider-browser";
|
|
9
|
+
export type { RetryOptions } from "spider-browser";
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,26 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
|
|
3
|
+
exports.RetryEngine = exports.extract = exports.observe = exports.act = exports.Agent = exports.SpiderPage = exports.SpiderBrowser = exports.AIStudioRateLimitExceeded = exports.AIStudioSubscriptionRequired = exports.AIStudioInfo = exports.AI_STUDIO_RATE_LIMITS = exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
|
|
4
4
|
var client_1 = require("./client");
|
|
5
5
|
Object.defineProperty(exports, "Spider", { enumerable: true, get: function () { return client_1.Spider; } });
|
|
6
6
|
var config_1 = require("./config");
|
|
7
7
|
Object.defineProperty(exports, "Collection", { enumerable: true, get: function () { return config_1.Collection; } });
|
|
8
8
|
Object.defineProperty(exports, "setBaseUrl", { enumerable: true, get: function () { return config_1.setBaseUrl; } });
|
|
9
9
|
Object.defineProperty(exports, "APISchema", { enumerable: true, get: function () { return config_1.APISchema; } });
|
|
10
|
+
Object.defineProperty(exports, "AI_STUDIO_RATE_LIMITS", { enumerable: true, get: function () { return config_1.AI_STUDIO_RATE_LIMITS; } });
|
|
11
|
+
Object.defineProperty(exports, "AIStudioInfo", { enumerable: true, get: function () { return config_1.AIStudioInfo; } });
|
|
12
|
+
Object.defineProperty(exports, "AIStudioSubscriptionRequired", { enumerable: true, get: function () { return config_1.AIStudioSubscriptionRequired; } });
|
|
13
|
+
Object.defineProperty(exports, "AIStudioRateLimitExceeded", { enumerable: true, get: function () { return config_1.AIStudioRateLimitExceeded; } });
|
|
14
|
+
// Browser automation
|
|
15
|
+
var spider_browser_1 = require("spider-browser");
|
|
16
|
+
Object.defineProperty(exports, "SpiderBrowser", { enumerable: true, get: function () { return spider_browser_1.SpiderBrowser; } });
|
|
17
|
+
Object.defineProperty(exports, "SpiderPage", { enumerable: true, get: function () { return spider_browser_1.SpiderPage; } });
|
|
18
|
+
// Browser AI
|
|
19
|
+
var spider_browser_2 = require("spider-browser");
|
|
20
|
+
Object.defineProperty(exports, "Agent", { enumerable: true, get: function () { return spider_browser_2.Agent; } });
|
|
21
|
+
Object.defineProperty(exports, "act", { enumerable: true, get: function () { return spider_browser_2.act; } });
|
|
22
|
+
Object.defineProperty(exports, "observe", { enumerable: true, get: function () { return spider_browser_2.observe; } });
|
|
23
|
+
Object.defineProperty(exports, "extract", { enumerable: true, get: function () { return spider_browser_2.extract; } });
|
|
24
|
+
// Browser retry & stealth
|
|
25
|
+
var spider_browser_3 = require("spider-browser");
|
|
26
|
+
Object.defineProperty(exports, "RetryEngine", { enumerable: true, get: function () { return spider_browser_3.RetryEngine; } });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@spider-cloud/spider-client",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Isomorphic Javascript SDK for Spider Cloud services",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"test": "node --import tsx --test __tests__/*test.ts",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"prepublishOnly": "npm test && npm run build"
|
|
9
9
|
},
|
|
10
10
|
"main": "dist/index.js",
|
|
11
|
-
"types": "dist/
|
|
11
|
+
"types": "dist/index.d.ts",
|
|
12
12
|
"files": [
|
|
13
13
|
"dist/**/*"
|
|
14
14
|
],
|
|
@@ -17,6 +17,9 @@
|
|
|
17
17
|
"sdk",
|
|
18
18
|
"web crawling",
|
|
19
19
|
"web scraping",
|
|
20
|
+
"browser automation",
|
|
21
|
+
"cdp",
|
|
22
|
+
"headless",
|
|
20
23
|
"api",
|
|
21
24
|
"llm scraping"
|
|
22
25
|
],
|
|
@@ -29,6 +32,7 @@
|
|
|
29
32
|
"typescript": "5.7.3"
|
|
30
33
|
},
|
|
31
34
|
"dependencies": {
|
|
32
|
-
"exponential-backoff": "^3.1.2"
|
|
35
|
+
"exponential-backoff": "^3.1.2",
|
|
36
|
+
"spider-browser": "^0.2.3"
|
|
33
37
|
}
|
|
34
38
|
}
|