@spider-cloud/spider-client 0.0.20 → 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/client.d.ts +23 -2
- package/dist/client.js +39 -42
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -95,8 +95,10 @@ spider
|
|
|
95
95
|
|
|
96
96
|
- **`scrapeUrl(url, params)`**: Scrape data from a specified URL. Optional parameters can be passed to customize the scraping behavior.
|
|
97
97
|
- **`crawlUrl(url, params, stream)`**: Begin crawling from a specific URL with optional parameters for customization and an optional streaming response.
|
|
98
|
+
- **`search(q, params)`**: Perform a search and gather a list of websites to start crawling and collect resources.
|
|
98
99
|
- **`links(url, params)`**: Retrieve all links from the specified URL with optional parameters.
|
|
99
100
|
- **`screenshot(url, params)`**: Take a screenshot of the specified URL.
|
|
101
|
+
- **`transform(data, params)`**: Perform a fast HTML transformation to markdown or text.
|
|
100
102
|
- **`extractContacts(url, params)`**: Extract contact information from the specified URL.
|
|
101
103
|
- **`label(url, params)`**: Apply labeling to data extracted from the specified URL.
|
|
102
104
|
- **`getCrawlState(url, params)`**: Check the website crawl state.
|
package/dist/client.d.ts
CHANGED
|
@@ -9,7 +9,6 @@ export interface SpiderConfig {
|
|
|
9
9
|
*/
|
|
10
10
|
export declare class Spider {
|
|
11
11
|
private apiKey?;
|
|
12
|
-
private dataEndPoint;
|
|
13
12
|
/**
|
|
14
13
|
* Create an instance of Spider.
|
|
15
14
|
* @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables.
|
|
@@ -30,6 +29,12 @@ export declare class Spider {
|
|
|
30
29
|
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
31
30
|
*/
|
|
32
31
|
private _apiGet;
|
|
32
|
+
/**
|
|
33
|
+
* Internal method to handle DELETE requests.
|
|
34
|
+
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
35
|
+
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
36
|
+
*/
|
|
37
|
+
private _apiDelete;
|
|
33
38
|
/**
|
|
34
39
|
* Scrapes data from a specified URL.
|
|
35
40
|
* @param {string} url - The URL to scrape.
|
|
@@ -59,6 +64,23 @@ export declare class Spider {
|
|
|
59
64
|
* @returns {Promise<any>} The screenshot data.
|
|
60
65
|
*/
|
|
61
66
|
screenshot(url: string, params?: {}): Promise<any>;
|
|
67
|
+
/**
|
|
68
|
+
* Perform a search and gather a list of websites to start crawling and collect resources.
|
|
69
|
+
* @param {string} search - The search query.
|
|
70
|
+
* @param {object} [params={}] - Configuration parameters for the search.
|
|
71
|
+
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
|
|
72
|
+
*/
|
|
73
|
+
search(q: string, params?: {}): Promise<any>;
|
|
74
|
+
/**
|
|
75
|
+
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
|
|
76
|
+
* @param {object} data - The data to trasnform, a list of objects with the key 'html' and optional 'url' key for readability.
|
|
77
|
+
* @param {object} [params={}] - Configuration parameters for the transformation.
|
|
78
|
+
* @returns {Promise<any>} The transformation result.
|
|
79
|
+
*/
|
|
80
|
+
transform(data: {
|
|
81
|
+
html: string;
|
|
82
|
+
url?: string;
|
|
83
|
+
}[], params?: {}): Promise<any>;
|
|
62
84
|
/**
|
|
63
85
|
* Extracts contact information from the specified URL.
|
|
64
86
|
* @param {string} url - The URL from which to extract contacts.
|
|
@@ -106,7 +128,6 @@ export declare class Spider {
|
|
|
106
128
|
* @returns {Promise<any>} The response from the server.
|
|
107
129
|
*/
|
|
108
130
|
deleteData(table: string, params: object): Promise<any>;
|
|
109
|
-
private _apiDataPost;
|
|
110
131
|
/**
|
|
111
132
|
* Prepares common headers for each API request.
|
|
112
133
|
* @returns {HeadersInit} A headers object for fetch requests.
|
package/dist/client.js
CHANGED
|
@@ -12,7 +12,6 @@ class Spider {
|
|
|
12
12
|
*/
|
|
13
13
|
constructor(props) {
|
|
14
14
|
var _a;
|
|
15
|
-
this.dataEndPoint = "https://api.spider.cloud:3280";
|
|
16
15
|
this.apiKey = (props === null || props === void 0 ? void 0 : props.apiKey) || ((_a = process === null || process === void 0 ? void 0 : process.env) === null || _a === void 0 ? void 0 : _a.SPIDER_API_KEY);
|
|
17
16
|
if (!this.apiKey) {
|
|
18
17
|
throw new Error("No API key provided");
|
|
@@ -60,6 +59,24 @@ class Spider {
|
|
|
60
59
|
this.handleError(response, `get from ${endpoint}`);
|
|
61
60
|
}
|
|
62
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Internal method to handle DELETE requests.
|
|
64
|
+
* @param {string} endpoint - The API endpoint from which data should be retrieved.
|
|
65
|
+
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
66
|
+
*/
|
|
67
|
+
async _apiDelete(endpoint) {
|
|
68
|
+
const headers = this.prepareHeaders();
|
|
69
|
+
const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
|
|
70
|
+
method: "DELETE",
|
|
71
|
+
headers,
|
|
72
|
+
});
|
|
73
|
+
if (response.ok) {
|
|
74
|
+
return response.json();
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
this.handleError(response, `get from ${endpoint}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
63
80
|
/**
|
|
64
81
|
* Scrapes data from a specified URL.
|
|
65
82
|
* @param {string} url - The URL to scrape.
|
|
@@ -97,6 +114,24 @@ class Spider {
|
|
|
97
114
|
async screenshot(url, params = {}) {
|
|
98
115
|
return this._apiPost("screenshot", { url: url, ...params });
|
|
99
116
|
}
|
|
117
|
+
/**
|
|
118
|
+
* Perform a search and gather a list of websites to start crawling and collect resources.
|
|
119
|
+
* @param {string} search - The search query.
|
|
120
|
+
* @param {object} [params={}] - Configuration parameters for the search.
|
|
121
|
+
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
|
|
122
|
+
*/
|
|
123
|
+
async search(q, params = {}) {
|
|
124
|
+
return this._apiPost("search", { search: q, ...params });
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
|
|
128
|
+
* @param {object} data - The data to trasnform, a list of objects with the key 'html' and optional 'url' key for readability.
|
|
129
|
+
* @param {object} [params={}] - Configuration parameters for the transformation.
|
|
130
|
+
* @returns {Promise<any>} The transformation result.
|
|
131
|
+
*/
|
|
132
|
+
async transform(data, params = {}) {
|
|
133
|
+
return this._apiPost("transform", { data, ...params });
|
|
134
|
+
}
|
|
100
135
|
/**
|
|
101
136
|
* Extracts contact information from the specified URL.
|
|
102
137
|
* @param {string} url - The URL from which to extract contacts.
|
|
@@ -138,8 +173,7 @@ class Spider {
|
|
|
138
173
|
* @returns {Promise<any>} The response from the server.
|
|
139
174
|
*/
|
|
140
175
|
async postData(table, data) {
|
|
141
|
-
|
|
142
|
-
return this._apiDataPost(endpoint, data);
|
|
176
|
+
return this._apiPost(`data/${table}`, data);
|
|
143
177
|
}
|
|
144
178
|
/**
|
|
145
179
|
* Send a GET request to retrieve data from a specified table.
|
|
@@ -148,18 +182,7 @@ class Spider {
|
|
|
148
182
|
* @returns {Promise<any>} The response from the server.
|
|
149
183
|
*/
|
|
150
184
|
async getData(table, params) {
|
|
151
|
-
|
|
152
|
-
const headers = this.prepareHeaders();
|
|
153
|
-
const response = await fetch(`${this.dataEndPoint}${endpoint}`, {
|
|
154
|
-
method: "GET",
|
|
155
|
-
headers: headers,
|
|
156
|
-
});
|
|
157
|
-
if (response.ok) {
|
|
158
|
-
return response.json();
|
|
159
|
-
}
|
|
160
|
-
else {
|
|
161
|
-
this.handleError(response, `get data from ${table}`);
|
|
162
|
-
}
|
|
185
|
+
return this._apiGet(`data/${table}?${new URLSearchParams(params).toString()}`);
|
|
163
186
|
}
|
|
164
187
|
/**
|
|
165
188
|
* Send a DELETE request to remove data from a specified table.
|
|
@@ -168,33 +191,7 @@ class Spider {
|
|
|
168
191
|
* @returns {Promise<any>} The response from the server.
|
|
169
192
|
*/
|
|
170
193
|
async deleteData(table, params) {
|
|
171
|
-
|
|
172
|
-
const headers = this.prepareHeaders();
|
|
173
|
-
const response = await fetch(`${this.dataEndPoint}${endpoint}`, {
|
|
174
|
-
method: "DELETE",
|
|
175
|
-
headers,
|
|
176
|
-
});
|
|
177
|
-
if (response.ok) {
|
|
178
|
-
return response.json();
|
|
179
|
-
}
|
|
180
|
-
else {
|
|
181
|
-
this.handleError(response, `delete data from ${table}`);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
// Create wrapper methods for external API access:
|
|
185
|
-
async _apiDataPost(endpoint, data) {
|
|
186
|
-
const headers = this.prepareHeaders();
|
|
187
|
-
const response = await fetch(`${this.dataEndPoint}${endpoint}`, {
|
|
188
|
-
method: "POST",
|
|
189
|
-
headers,
|
|
190
|
-
body: JSON.stringify(data),
|
|
191
|
-
});
|
|
192
|
-
if (response.ok) {
|
|
193
|
-
return response.json();
|
|
194
|
-
}
|
|
195
|
-
else {
|
|
196
|
-
this.handleError(response, `post to ${endpoint}`);
|
|
197
|
-
}
|
|
194
|
+
return this._apiDelete(`data/${table}?${new URLSearchParams(params).toString()}`);
|
|
198
195
|
}
|
|
199
196
|
/**
|
|
200
197
|
* Prepares common headers for each API request.
|