@spider-cloud/spider-client 0.1.75 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -48
- package/dist/client.d.ts +1 -48
- package/dist/client.js +0 -100
- package/dist/config.d.ts +11 -31
- package/dist/config.js +0 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,51 +85,6 @@ const streamCallback = (data) => {
|
|
|
85
85
|
app.crawlUrl(url, crawlParams, stream, streamCallback);
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
### Data Operations
|
|
89
|
-
|
|
90
|
-
The Spider client can interact with specific data tables to create, retrieve, and delete data.
|
|
91
|
-
|
|
92
|
-
#### Retrieve Data from a Table
|
|
93
|
-
|
|
94
|
-
To fetch data from a specified table by applying query parameters, use the `getData` method. Provide the table name and an object containing query parameters:
|
|
95
|
-
|
|
96
|
-
```javascript
|
|
97
|
-
const tableName = "pages";
|
|
98
|
-
const queryParams = { limit: 20 };
|
|
99
|
-
spider
|
|
100
|
-
.getData(tableName, queryParams)
|
|
101
|
-
.then((response) => console.log(response))
|
|
102
|
-
.catch((error) => console.error(error));
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
This example retrieves data from the 'pages' table, limiting the results to 20 entries.
|
|
106
|
-
|
|
107
|
-
#### Delete Data from a Table
|
|
108
|
-
|
|
109
|
-
To delete data from a specified table based on certain conditions, use the `deleteData` method. Provide the table name and an object specifying the conditions for deletion:
|
|
110
|
-
|
|
111
|
-
```javascript
|
|
112
|
-
const tableName = "websites";
|
|
113
|
-
const deleteParams = { domain: "www.example.com" };
|
|
114
|
-
spider
|
|
115
|
-
.deleteData(tableName, deleteParams)
|
|
116
|
-
.then((response) => console.log(response))
|
|
117
|
-
.catch((error) => console.error(error));
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
#### Download storage data
|
|
121
|
-
|
|
122
|
-
To download stored data like raw HTML or markdown use the `createSignedUrl` method. Provide the website name and an object containing query parameters:
|
|
123
|
-
|
|
124
|
-
```javascript
|
|
125
|
-
const websiteName = "spider.cloud";
|
|
126
|
-
const queryParams = { limit: 20, page: 0 };
|
|
127
|
-
spider
|
|
128
|
-
.createSignedUrl(websiteName, queryParams)
|
|
129
|
-
.then((response) => console.log(response))
|
|
130
|
-
.catch((error) => console.error(error));
|
|
131
|
-
```
|
|
132
|
-
|
|
133
88
|
### Available Methods
|
|
134
89
|
|
|
135
90
|
- **`scrapeUrl(url, params)`**: Scrape data from a specified URL. Optional parameters can be passed to customize the scraping behavior.
|
|
@@ -139,9 +94,6 @@ spider
|
|
|
139
94
|
- **`screenshot(url, params)`**: Take a screenshot of the specified URL.
|
|
140
95
|
- **`transform(data, params)`**: Perform a fast HTML transformation to markdown or text.
|
|
141
96
|
- **`getCredits()`**: Retrieve account's remaining credits.
|
|
142
|
-
- **`getData(table, params)`**: Retrieve data records from the DB.
|
|
143
|
-
- **`deleteData(table, params)`**: Delete records from the DB.
|
|
144
|
-
- **`createSignedUrl(domain, params)`**: Download the records from the DB.
|
|
145
97
|
|
|
146
98
|
## Error Handling
|
|
147
99
|
|
package/dist/client.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ChunkCallbackFunction, Collection,
|
|
1
|
+
import { ChunkCallbackFunction, Collection, SpiderCoreResponse, SpiderParams, SearchRequestParams, RequestParamsTransform } from "./config";
|
|
2
2
|
/**
|
|
3
3
|
* Generic params for core request.
|
|
4
4
|
*/
|
|
@@ -34,12 +34,6 @@ export declare class Spider {
|
|
|
34
34
|
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
35
35
|
*/
|
|
36
36
|
private _apiGet;
|
|
37
|
-
/**
|
|
38
|
-
* Internal method to handle DELETE requests.
|
|
39
|
-
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
40
|
-
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
41
|
-
*/
|
|
42
|
-
private _apiDelete;
|
|
43
37
|
/**
|
|
44
38
|
* Scrapes data from a specified URL.
|
|
45
39
|
* @param {string} url - The URL to scrape.
|
|
@@ -89,21 +83,6 @@ export declare class Spider {
|
|
|
89
83
|
html: string;
|
|
90
84
|
url?: string;
|
|
91
85
|
}[], params?: RequestParamsTransform): Promise<any>;
|
|
92
|
-
/**
|
|
93
|
-
* Create a signed url to download files from the storage.
|
|
94
|
-
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
|
95
|
-
* @param {Object} [options] - The download options.
|
|
96
|
-
* @param {boolean} [raw] - Return the raw response.
|
|
97
|
-
|
|
98
|
-
* @returns {Promise<Response>} The response containing the file stream.
|
|
99
|
-
*/
|
|
100
|
-
createSignedUrl(url?: string, options?: {
|
|
101
|
-
page?: number;
|
|
102
|
-
limit?: number;
|
|
103
|
-
expiresIn?: number;
|
|
104
|
-
domain?: string;
|
|
105
|
-
pathname?: string;
|
|
106
|
-
}): Promise<any>;
|
|
107
86
|
/**
|
|
108
87
|
* Retrieves the number of credits available on the account.
|
|
109
88
|
* @returns {Promise<any>} The current credit balance.
|
|
@@ -116,32 +95,6 @@ export declare class Spider {
|
|
|
116
95
|
* @returns {Promise<any>} The response from the server.
|
|
117
96
|
*/
|
|
118
97
|
postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
|
|
119
|
-
/**
|
|
120
|
-
* Send a GET request to retrieve data from a specified table.
|
|
121
|
-
* @param {Collection} table - The table name in the database.
|
|
122
|
-
* @param {object} params - The query parameters for data retrieval.
|
|
123
|
-
* @returns {Promise<any>} The response from the server.
|
|
124
|
-
*/
|
|
125
|
-
getData(collections: Collection, params: GenericParams | Record<string, any>): Promise<any>;
|
|
126
|
-
/**
|
|
127
|
-
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
128
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
129
|
-
* @returns {Promise<any>} The download response from the server.
|
|
130
|
-
*/
|
|
131
|
-
download(query: QueryRequest, output?: "text" | "blob"): Promise<any>;
|
|
132
|
-
/**
|
|
133
|
-
* Perform a query to get a document.
|
|
134
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
135
|
-
* @returns {Promise<any>} The response from the server.
|
|
136
|
-
*/
|
|
137
|
-
query(query: QueryRequest): Promise<any>;
|
|
138
|
-
/**
|
|
139
|
-
* Send a DELETE request to remove data from a specified table.
|
|
140
|
-
* @param {Collection} table - The table name in the database.
|
|
141
|
-
* @param {object} params - Parameters to identify records to delete.
|
|
142
|
-
* @returns {Promise<any>} The response from the server.
|
|
143
|
-
*/
|
|
144
|
-
deleteData(collection: Collection, params: GenericParams | Record<string, any>): Promise<any>;
|
|
145
98
|
/**
|
|
146
99
|
* Prepares common headers for each API request.
|
|
147
100
|
* @returns {HeadersInit} A headers object for fetch requests.
|
package/dist/client.js
CHANGED
|
@@ -67,27 +67,6 @@ class Spider {
|
|
|
67
67
|
this.handleError(response, `get from ${endpoint}`);
|
|
68
68
|
}
|
|
69
69
|
}
|
|
70
|
-
/**
|
|
71
|
-
* Internal method to handle DELETE requests.
|
|
72
|
-
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
73
|
-
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
74
|
-
*/
|
|
75
|
-
async _apiDelete(endpoint) {
|
|
76
|
-
const headers = this.prepareHeaders;
|
|
77
|
-
const response = await (0, exponential_backoff_1.backOff)(() => fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
78
|
-
method: "DELETE",
|
|
79
|
-
headers,
|
|
80
|
-
body: JSON.stringify({}),
|
|
81
|
-
}), {
|
|
82
|
-
numOfAttempts: 5,
|
|
83
|
-
});
|
|
84
|
-
if (response.ok) {
|
|
85
|
-
return response;
|
|
86
|
-
}
|
|
87
|
-
else {
|
|
88
|
-
return this.handleError(response, `delete from ${endpoint}`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
70
|
/**
|
|
92
71
|
* Scrapes data from a specified URL.
|
|
93
72
|
* @param {string} url - The URL to scrape.
|
|
@@ -162,37 +141,6 @@ class Spider {
|
|
|
162
141
|
: data,
|
|
163
142
|
});
|
|
164
143
|
}
|
|
165
|
-
/**
|
|
166
|
-
* Create a signed url to download files from the storage.
|
|
167
|
-
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
|
168
|
-
* @param {Object} [options] - The download options.
|
|
169
|
-
* @param {boolean} [raw] - Return the raw response.
|
|
170
|
-
|
|
171
|
-
* @returns {Promise<Response>} The response containing the file stream.
|
|
172
|
-
*/
|
|
173
|
-
async createSignedUrl(url, options) {
|
|
174
|
-
const { page, limit, expiresIn, domain, pathname } = options !== null && options !== void 0 ? options : {};
|
|
175
|
-
const params = new URLSearchParams({
|
|
176
|
-
...(url && { url }),
|
|
177
|
-
...(domain && { domain }),
|
|
178
|
-
...(pathname && { pathname }),
|
|
179
|
-
...(page && { page: page.toString() }),
|
|
180
|
-
...(limit && { limit: limit.toString() }),
|
|
181
|
-
...(expiresIn && { expiresIn: expiresIn.toString() }),
|
|
182
|
-
});
|
|
183
|
-
const endpoint = `${config_1.APISchema["url"]}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
|
|
184
|
-
const headers = this.prepareHeaders;
|
|
185
|
-
const response = await fetch(endpoint, {
|
|
186
|
-
method: "GET",
|
|
187
|
-
headers,
|
|
188
|
-
});
|
|
189
|
-
if (response.ok) {
|
|
190
|
-
return await response.json();
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
this.handleError(response, `Failed to sign files`);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
144
|
/**
|
|
197
145
|
* Retrieves the number of credits available on the account.
|
|
198
146
|
* @returns {Promise<any>} The current credit balance.
|
|
@@ -209,54 +157,6 @@ class Spider {
|
|
|
209
157
|
async postData(collection, data) {
|
|
210
158
|
return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
|
|
211
159
|
}
|
|
212
|
-
/**
|
|
213
|
-
* Send a GET request to retrieve data from a specified table.
|
|
214
|
-
* @param {Collection} table - The table name in the database.
|
|
215
|
-
* @param {object} params - The query parameters for data retrieval.
|
|
216
|
-
* @returns {Promise<any>} The response from the server.
|
|
217
|
-
*/
|
|
218
|
-
async getData(collections, params) {
|
|
219
|
-
return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
|
|
220
|
-
}
|
|
221
|
-
/**
|
|
222
|
-
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
223
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
224
|
-
* @returns {Promise<any>} The download response from the server.
|
|
225
|
-
*/
|
|
226
|
-
async download(query, output) {
|
|
227
|
-
const headers = this.prepareHeaders;
|
|
228
|
-
const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
|
|
229
|
-
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
230
|
-
method: "GET",
|
|
231
|
-
headers,
|
|
232
|
-
});
|
|
233
|
-
if (response.ok) {
|
|
234
|
-
if (output === "text") {
|
|
235
|
-
return await response.text();
|
|
236
|
-
}
|
|
237
|
-
return await response.blob();
|
|
238
|
-
}
|
|
239
|
-
else {
|
|
240
|
-
this.handleError(response, `get from ${endpoint}`);
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Perform a query to get a document.
|
|
245
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
246
|
-
* @returns {Promise<any>} The response from the server.
|
|
247
|
-
*/
|
|
248
|
-
async query(query) {
|
|
249
|
-
return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
|
|
250
|
-
}
|
|
251
|
-
/**
|
|
252
|
-
* Send a DELETE request to remove data from a specified table.
|
|
253
|
-
* @param {Collection} table - The table name in the database.
|
|
254
|
-
* @param {object} params - Parameters to identify records to delete.
|
|
255
|
-
* @returns {Promise<any>} The response from the server.
|
|
256
|
-
*/
|
|
257
|
-
async deleteData(collection, params) {
|
|
258
|
-
return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
|
|
259
|
-
}
|
|
260
160
|
/**
|
|
261
161
|
* Prepares common headers for each API request.
|
|
262
162
|
* @returns {HeadersInit} A headers object for fetch requests.
|
package/dist/config.d.ts
CHANGED
|
@@ -135,84 +135,72 @@ type CSSExtractionMap = {
|
|
|
135
135
|
[path: string]: CSSSelector[];
|
|
136
136
|
};
|
|
137
137
|
export type Evaluate = {
|
|
138
|
-
type: "Evaluate";
|
|
139
138
|
/** Rust: Evaluate(String) */
|
|
140
139
|
code: string;
|
|
141
140
|
};
|
|
142
141
|
export type Click = {
|
|
143
|
-
type: "Click";
|
|
144
142
|
/** Rust: Click(String) */
|
|
145
143
|
selector: string;
|
|
146
144
|
};
|
|
147
145
|
export type ClickAll = {
|
|
148
|
-
type: "ClickAll";
|
|
149
146
|
/** Rust: ClickAll(String) */
|
|
150
147
|
selector: string;
|
|
151
148
|
};
|
|
152
|
-
export type ClickAllClickable = {
|
|
153
|
-
|
|
149
|
+
export type ClickAllClickable = {};
|
|
150
|
+
export type ClickPoint = {
|
|
151
|
+
x: number;
|
|
152
|
+
y: number;
|
|
154
153
|
};
|
|
155
154
|
export type Wait = {
|
|
156
|
-
type: "Wait";
|
|
157
155
|
/** Rust: u64 (milliseconds) */
|
|
158
156
|
ms: number;
|
|
159
157
|
};
|
|
160
|
-
export type WaitForNavigation = {
|
|
161
|
-
type: "WaitForNavigation";
|
|
162
|
-
};
|
|
158
|
+
export type WaitForNavigation = {};
|
|
163
159
|
export type WaitForDom = {
|
|
164
|
-
type: "WaitForDom";
|
|
165
160
|
/** Rust: Option<String> */
|
|
166
161
|
selector?: string | null;
|
|
167
162
|
/** Rust: u32 (milliseconds) */
|
|
168
163
|
timeout: number;
|
|
169
164
|
};
|
|
170
165
|
export type WaitFor = {
|
|
171
|
-
type: "WaitFor";
|
|
172
|
-
/** Rust: String */
|
|
173
166
|
selector: string;
|
|
174
167
|
};
|
|
175
168
|
export type WaitForWithTimeout = {
|
|
176
|
-
type: "WaitForWithTimeout";
|
|
177
169
|
selector: string;
|
|
178
170
|
/** Rust: u64 (milliseconds) */
|
|
179
171
|
timeout: number;
|
|
180
172
|
};
|
|
181
173
|
export type WaitForAndClick = {
|
|
182
|
-
type: "WaitForAndClick";
|
|
183
174
|
selector: string;
|
|
184
175
|
};
|
|
185
176
|
export type ScrollX = {
|
|
186
|
-
type: "ScrollX";
|
|
187
177
|
/** Rust: i32 (pixels) */
|
|
188
178
|
dx: number;
|
|
189
179
|
};
|
|
190
180
|
export type ScrollY = {
|
|
191
|
-
type: "ScrollY";
|
|
192
181
|
/** Rust: i32 (pixels) */
|
|
193
182
|
dy: number;
|
|
194
183
|
};
|
|
195
184
|
export type Fill = {
|
|
196
|
-
type: "Fill";
|
|
197
185
|
selector: string;
|
|
198
186
|
value: string;
|
|
199
187
|
};
|
|
188
|
+
export type Type = {
|
|
189
|
+
modifier: number;
|
|
190
|
+
value: string;
|
|
191
|
+
};
|
|
200
192
|
export type InfiniteScroll = {
|
|
201
|
-
type: "InfiniteScroll";
|
|
202
193
|
/** Rust: u32 (pixels/step or count—match your semantics) */
|
|
203
194
|
step_px: number;
|
|
204
195
|
};
|
|
205
196
|
export type Screenshot = {
|
|
206
|
-
type: "Screenshot";
|
|
207
197
|
/** Keep snake_case to match Rust JSON if interop is needed */
|
|
208
198
|
full_page: boolean;
|
|
209
199
|
omit_background: boolean;
|
|
210
200
|
output: string;
|
|
211
201
|
};
|
|
212
|
-
export type ValidateChain = {
|
|
213
|
-
|
|
214
|
-
};
|
|
215
|
-
export type WebAutomation = Evaluate | Click | ClickAll | ClickAllClickable | Wait | WaitForNavigation | WaitForDom | WaitFor | WaitForWithTimeout | WaitForAndClick | ScrollX | ScrollY | Fill | InfiniteScroll | Screenshot | ValidateChain;
|
|
202
|
+
export type ValidateChain = {};
|
|
203
|
+
export type WebAutomation = Evaluate | Click | ClickAll | ClickAllClickable | ClickPoint | Wait | WaitForNavigation | WaitForDom | WaitFor | WaitForWithTimeout | WaitForAndClick | ScrollX | ScrollY | Fill | Type | InfiniteScroll | Screenshot | ValidateChain;
|
|
216
204
|
export type ReturnFormat = "markdown" | "commonmark" | "raw" | "screenshot" | "text" | "html2text" | "bytes" | "xml" | "empty";
|
|
217
205
|
export type WebAutomationMap = Record<string, WebAutomation[]>;
|
|
218
206
|
export type ExecutionScriptsMap = Record<string, string>;
|
|
@@ -299,10 +287,6 @@ export interface SpiderParams {
|
|
|
299
287
|
* The headers to be used for the request.
|
|
300
288
|
*/
|
|
301
289
|
headers?: Headers;
|
|
302
|
-
/**
|
|
303
|
-
* Specifies whether anti-bot measures should be used.
|
|
304
|
-
*/
|
|
305
|
-
anti_bot?: boolean;
|
|
306
290
|
/**
|
|
307
291
|
* Specifies whether to include metadata in the response.
|
|
308
292
|
*/
|
|
@@ -554,10 +538,6 @@ export declare enum APIRoutes {
|
|
|
554
538
|
Search = "search",
|
|
555
539
|
Transform = "transform",
|
|
556
540
|
Data = "data",
|
|
557
|
-
DataCrawlState = "data/crawl_state",
|
|
558
|
-
DataSignUrl = "data/sign-url",
|
|
559
|
-
DataDownload = "data/download",
|
|
560
|
-
DataQuery = "data/query",
|
|
561
541
|
DataCredits = "data/credits"
|
|
562
542
|
}
|
|
563
543
|
export declare const APISchema: {
|
package/dist/config.js
CHANGED
|
@@ -42,14 +42,6 @@ var APIRoutes;
|
|
|
42
42
|
APIRoutes["Transform"] = "transform";
|
|
43
43
|
// Dynamic collection routes.
|
|
44
44
|
APIRoutes["Data"] = "data";
|
|
45
|
-
// The last crawl state of a website.
|
|
46
|
-
APIRoutes["DataCrawlState"] = "data/crawl_state";
|
|
47
|
-
// Sign a file from storage based on the exact url path of the storage or domain - pathname.
|
|
48
|
-
APIRoutes["DataSignUrl"] = "data/sign-url";
|
|
49
|
-
// Download a file from storage based on the exact url path of the storage or domain - pathname.
|
|
50
|
-
APIRoutes["DataDownload"] = "data/download";
|
|
51
|
-
// Perform a query on the global database to grab content without crawling if available.
|
|
52
|
-
APIRoutes["DataQuery"] = "data/query";
|
|
53
45
|
// Get the credits remaining for an account.
|
|
54
46
|
APIRoutes["DataCredits"] = "data/credits";
|
|
55
47
|
})(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
|