@spider-cloud/spider-client 0.1.77 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -48
- package/dist/client.d.ts +1 -48
- package/dist/client.js +0 -100
- package/dist/config.d.ts +0 -8
- package/dist/config.js +0 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,51 +85,6 @@ const streamCallback = (data) => {
|
|
|
85
85
|
app.crawlUrl(url, crawlParams, stream, streamCallback);
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
### Data Operations
|
|
89
|
-
|
|
90
|
-
The Spider client can interact with specific data tables to create, retrieve, and delete data.
|
|
91
|
-
|
|
92
|
-
#### Retrieve Data from a Table
|
|
93
|
-
|
|
94
|
-
To fetch data from a specified table by applying query parameters, use the `getData` method. Provide the table name and an object containing query parameters:
|
|
95
|
-
|
|
96
|
-
```javascript
|
|
97
|
-
const tableName = "pages";
|
|
98
|
-
const queryParams = { limit: 20 };
|
|
99
|
-
spider
|
|
100
|
-
.getData(tableName, queryParams)
|
|
101
|
-
.then((response) => console.log(response))
|
|
102
|
-
.catch((error) => console.error(error));
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
This example retrieves data from the 'pages' table, limiting the results to 20 entries.
|
|
106
|
-
|
|
107
|
-
#### Delete Data from a Table
|
|
108
|
-
|
|
109
|
-
To delete data from a specified table based on certain conditions, use the `deleteData` method. Provide the table name and an object specifying the conditions for deletion:
|
|
110
|
-
|
|
111
|
-
```javascript
|
|
112
|
-
const tableName = "websites";
|
|
113
|
-
const deleteParams = { domain: "www.example.com" };
|
|
114
|
-
spider
|
|
115
|
-
.deleteData(tableName, deleteParams)
|
|
116
|
-
.then((response) => console.log(response))
|
|
117
|
-
.catch((error) => console.error(error));
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
#### Download storage data
|
|
121
|
-
|
|
122
|
-
To download stored data like raw HTML or markdown use the `createSignedUrl` method. Provide the website name and an object containing query parameters:
|
|
123
|
-
|
|
124
|
-
```javascript
|
|
125
|
-
const websiteName = "spider.cloud";
|
|
126
|
-
const queryParams = { limit: 20, page: 0 };
|
|
127
|
-
spider
|
|
128
|
-
.createSignedUrl(websiteName, queryParams)
|
|
129
|
-
.then((response) => console.log(response))
|
|
130
|
-
.catch((error) => console.error(error));
|
|
131
|
-
```
|
|
132
|
-
|
|
133
88
|
### Available Methods
|
|
134
89
|
|
|
135
90
|
- **`scrapeUrl(url, params)`**: Scrape data from a specified URL. Optional parameters can be passed to customize the scraping behavior.
|
|
@@ -139,9 +94,6 @@ spider
|
|
|
139
94
|
- **`screenshot(url, params)`**: Take a screenshot of the specified URL.
|
|
140
95
|
- **`transform(data, params)`**: Perform a fast HTML transformation to markdown or text.
|
|
141
96
|
- **`getCredits()`**: Retrieve account's remaining credits.
|
|
142
|
-
- **`getData(table, params)`**: Retrieve data records from the DB.
|
|
143
|
-
- **`deleteData(table, params)`**: Delete records from the DB.
|
|
144
|
-
- **`createSignedUrl(domain, params)`**: Download the records from the DB.
|
|
145
97
|
|
|
146
98
|
## Error Handling
|
|
147
99
|
|
package/dist/client.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ChunkCallbackFunction, Collection,
|
|
1
|
+
import { ChunkCallbackFunction, Collection, SpiderCoreResponse, SpiderParams, SearchRequestParams, RequestParamsTransform } from "./config";
|
|
2
2
|
/**
|
|
3
3
|
* Generic params for core request.
|
|
4
4
|
*/
|
|
@@ -34,12 +34,6 @@ export declare class Spider {
|
|
|
34
34
|
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
35
35
|
*/
|
|
36
36
|
private _apiGet;
|
|
37
|
-
/**
|
|
38
|
-
* Internal method to handle DELETE requests.
|
|
39
|
-
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
40
|
-
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
41
|
-
*/
|
|
42
|
-
private _apiDelete;
|
|
43
37
|
/**
|
|
44
38
|
* Scrapes data from a specified URL.
|
|
45
39
|
* @param {string} url - The URL to scrape.
|
|
@@ -89,21 +83,6 @@ export declare class Spider {
|
|
|
89
83
|
html: string;
|
|
90
84
|
url?: string;
|
|
91
85
|
}[], params?: RequestParamsTransform): Promise<any>;
|
|
92
|
-
/**
|
|
93
|
-
* Create a signed url to download files from the storage.
|
|
94
|
-
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
|
95
|
-
* @param {Object} [options] - The download options.
|
|
96
|
-
* @param {boolean} [raw] - Return the raw response.
|
|
97
|
-
|
|
98
|
-
* @returns {Promise<Response>} The response containing the file stream.
|
|
99
|
-
*/
|
|
100
|
-
createSignedUrl(url?: string, options?: {
|
|
101
|
-
page?: number;
|
|
102
|
-
limit?: number;
|
|
103
|
-
expiresIn?: number;
|
|
104
|
-
domain?: string;
|
|
105
|
-
pathname?: string;
|
|
106
|
-
}): Promise<any>;
|
|
107
86
|
/**
|
|
108
87
|
* Retrieves the number of credits available on the account.
|
|
109
88
|
* @returns {Promise<any>} The current credit balance.
|
|
@@ -116,32 +95,6 @@ export declare class Spider {
|
|
|
116
95
|
* @returns {Promise<any>} The response from the server.
|
|
117
96
|
*/
|
|
118
97
|
postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
|
|
119
|
-
/**
|
|
120
|
-
* Send a GET request to retrieve data from a specified table.
|
|
121
|
-
* @param {Collection} table - The table name in the database.
|
|
122
|
-
* @param {object} params - The query parameters for data retrieval.
|
|
123
|
-
* @returns {Promise<any>} The response from the server.
|
|
124
|
-
*/
|
|
125
|
-
getData(collections: Collection, params: GenericParams | Record<string, any>): Promise<any>;
|
|
126
|
-
/**
|
|
127
|
-
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
128
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
129
|
-
* @returns {Promise<any>} The download response from the server.
|
|
130
|
-
*/
|
|
131
|
-
download(query: QueryRequest, output?: "text" | "blob"): Promise<any>;
|
|
132
|
-
/**
|
|
133
|
-
* Perform a query to get a document.
|
|
134
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
135
|
-
* @returns {Promise<any>} The response from the server.
|
|
136
|
-
*/
|
|
137
|
-
query(query: QueryRequest): Promise<any>;
|
|
138
|
-
/**
|
|
139
|
-
* Send a DELETE request to remove data from a specified table.
|
|
140
|
-
* @param {Collection} table - The table name in the database.
|
|
141
|
-
* @param {object} params - Parameters to identify records to delete.
|
|
142
|
-
* @returns {Promise<any>} The response from the server.
|
|
143
|
-
*/
|
|
144
|
-
deleteData(collection: Collection, params: GenericParams | Record<string, any>): Promise<any>;
|
|
145
98
|
/**
|
|
146
99
|
* Prepares common headers for each API request.
|
|
147
100
|
* @returns {HeadersInit} A headers object for fetch requests.
|
package/dist/client.js
CHANGED
|
@@ -67,27 +67,6 @@ class Spider {
|
|
|
67
67
|
this.handleError(response, `get from ${endpoint}`);
|
|
68
68
|
}
|
|
69
69
|
}
|
|
70
|
-
/**
|
|
71
|
-
* Internal method to handle DELETE requests.
|
|
72
|
-
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
73
|
-
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
74
|
-
*/
|
|
75
|
-
async _apiDelete(endpoint) {
|
|
76
|
-
const headers = this.prepareHeaders;
|
|
77
|
-
const response = await (0, exponential_backoff_1.backOff)(() => fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
78
|
-
method: "DELETE",
|
|
79
|
-
headers,
|
|
80
|
-
body: JSON.stringify({}),
|
|
81
|
-
}), {
|
|
82
|
-
numOfAttempts: 5,
|
|
83
|
-
});
|
|
84
|
-
if (response.ok) {
|
|
85
|
-
return response;
|
|
86
|
-
}
|
|
87
|
-
else {
|
|
88
|
-
return this.handleError(response, `delete from ${endpoint}`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
70
|
/**
|
|
92
71
|
* Scrapes data from a specified URL.
|
|
93
72
|
* @param {string} url - The URL to scrape.
|
|
@@ -162,37 +141,6 @@ class Spider {
|
|
|
162
141
|
: data,
|
|
163
142
|
});
|
|
164
143
|
}
|
|
165
|
-
/**
|
|
166
|
-
* Create a signed url to download files from the storage.
|
|
167
|
-
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
|
168
|
-
* @param {Object} [options] - The download options.
|
|
169
|
-
* @param {boolean} [raw] - Return the raw response.
|
|
170
|
-
|
|
171
|
-
* @returns {Promise<Response>} The response containing the file stream.
|
|
172
|
-
*/
|
|
173
|
-
async createSignedUrl(url, options) {
|
|
174
|
-
const { page, limit, expiresIn, domain, pathname } = options !== null && options !== void 0 ? options : {};
|
|
175
|
-
const params = new URLSearchParams({
|
|
176
|
-
...(url && { url }),
|
|
177
|
-
...(domain && { domain }),
|
|
178
|
-
...(pathname && { pathname }),
|
|
179
|
-
...(page && { page: page.toString() }),
|
|
180
|
-
...(limit && { limit: limit.toString() }),
|
|
181
|
-
...(expiresIn && { expiresIn: expiresIn.toString() }),
|
|
182
|
-
});
|
|
183
|
-
const endpoint = `${config_1.APISchema["url"]}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
|
|
184
|
-
const headers = this.prepareHeaders;
|
|
185
|
-
const response = await fetch(endpoint, {
|
|
186
|
-
method: "GET",
|
|
187
|
-
headers,
|
|
188
|
-
});
|
|
189
|
-
if (response.ok) {
|
|
190
|
-
return await response.json();
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
this.handleError(response, `Failed to sign files`);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
144
|
/**
|
|
197
145
|
* Retrieves the number of credits available on the account.
|
|
198
146
|
* @returns {Promise<any>} The current credit balance.
|
|
@@ -209,54 +157,6 @@ class Spider {
|
|
|
209
157
|
async postData(collection, data) {
|
|
210
158
|
return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
|
|
211
159
|
}
|
|
212
|
-
/**
|
|
213
|
-
* Send a GET request to retrieve data from a specified table.
|
|
214
|
-
* @param {Collection} table - The table name in the database.
|
|
215
|
-
* @param {object} params - The query parameters for data retrieval.
|
|
216
|
-
* @returns {Promise<any>} The response from the server.
|
|
217
|
-
*/
|
|
218
|
-
async getData(collections, params) {
|
|
219
|
-
return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
|
|
220
|
-
}
|
|
221
|
-
/**
|
|
222
|
-
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
223
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
224
|
-
* @returns {Promise<any>} The download response from the server.
|
|
225
|
-
*/
|
|
226
|
-
async download(query, output) {
|
|
227
|
-
const headers = this.prepareHeaders;
|
|
228
|
-
const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
|
|
229
|
-
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
230
|
-
method: "GET",
|
|
231
|
-
headers,
|
|
232
|
-
});
|
|
233
|
-
if (response.ok) {
|
|
234
|
-
if (output === "text") {
|
|
235
|
-
return await response.text();
|
|
236
|
-
}
|
|
237
|
-
return await response.blob();
|
|
238
|
-
}
|
|
239
|
-
else {
|
|
240
|
-
this.handleError(response, `get from ${endpoint}`);
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Perform a query to get a document.
|
|
245
|
-
* @param {QueryRequest} params - The query parameters for data retrieval.
|
|
246
|
-
* @returns {Promise<any>} The response from the server.
|
|
247
|
-
*/
|
|
248
|
-
async query(query) {
|
|
249
|
-
return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
|
|
250
|
-
}
|
|
251
|
-
/**
|
|
252
|
-
* Send a DELETE request to remove data from a specified table.
|
|
253
|
-
* @param {Collection} table - The table name in the database.
|
|
254
|
-
* @param {object} params - Parameters to identify records to delete.
|
|
255
|
-
* @returns {Promise<any>} The response from the server.
|
|
256
|
-
*/
|
|
257
|
-
async deleteData(collection, params) {
|
|
258
|
-
return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
|
|
259
|
-
}
|
|
260
160
|
/**
|
|
261
161
|
* Prepares common headers for each API request.
|
|
262
162
|
* @returns {HeadersInit} A headers object for fetch requests.
|
package/dist/config.d.ts
CHANGED
|
@@ -287,10 +287,6 @@ export interface SpiderParams {
|
|
|
287
287
|
* The headers to be used for the request.
|
|
288
288
|
*/
|
|
289
289
|
headers?: Headers;
|
|
290
|
-
/**
|
|
291
|
-
* Specifies whether anti-bot measures should be used.
|
|
292
|
-
*/
|
|
293
|
-
anti_bot?: boolean;
|
|
294
290
|
/**
|
|
295
291
|
* Specifies whether to include metadata in the response.
|
|
296
292
|
*/
|
|
@@ -542,10 +538,6 @@ export declare enum APIRoutes {
|
|
|
542
538
|
Search = "search",
|
|
543
539
|
Transform = "transform",
|
|
544
540
|
Data = "data",
|
|
545
|
-
DataCrawlState = "data/crawl_state",
|
|
546
|
-
DataSignUrl = "data/sign-url",
|
|
547
|
-
DataDownload = "data/download",
|
|
548
|
-
DataQuery = "data/query",
|
|
549
541
|
DataCredits = "data/credits"
|
|
550
542
|
}
|
|
551
543
|
export declare const APISchema: {
|
package/dist/config.js
CHANGED
|
@@ -42,14 +42,6 @@ var APIRoutes;
|
|
|
42
42
|
APIRoutes["Transform"] = "transform";
|
|
43
43
|
// Dynamic collection routes.
|
|
44
44
|
APIRoutes["Data"] = "data";
|
|
45
|
-
// The last crawl state of a website.
|
|
46
|
-
APIRoutes["DataCrawlState"] = "data/crawl_state";
|
|
47
|
-
// Sign a file from storage based on the exact url path of the storage or domain - pathname.
|
|
48
|
-
APIRoutes["DataSignUrl"] = "data/sign-url";
|
|
49
|
-
// Download a file from storage based on the exact url path of the storage or domain - pathname.
|
|
50
|
-
APIRoutes["DataDownload"] = "data/download";
|
|
51
|
-
// Perform a query on the global database to grab content without crawling if available.
|
|
52
|
-
APIRoutes["DataQuery"] = "data/query";
|
|
53
45
|
// Get the credits remaining for an account.
|
|
54
46
|
APIRoutes["DataCredits"] = "data/credits";
|
|
55
47
|
})(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
|