@spider-cloud/spider-client 0.0.59 → 0.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +3 -3
- package/dist/client.js +28 -24
- package/dist/config.d.ts +32 -0
- package/dist/config.js +56 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -1
- package/package.json +1 -1
package/dist/client.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
|
|
2
|
-
export declare const
|
|
2
|
+
export declare const baseUrl: string;
|
|
3
3
|
/**
|
|
4
4
|
* Generic params for core request.
|
|
5
5
|
*/
|
|
@@ -97,7 +97,7 @@ export declare class Spider {
|
|
|
97
97
|
url?: string;
|
|
98
98
|
}[], params?: {}): Promise<any>;
|
|
99
99
|
/**
|
|
100
|
-
* Extracts
|
|
100
|
+
* Extracts leads from a website.
|
|
101
101
|
* @param {string} url - The URL from which to extract contacts.
|
|
102
102
|
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
103
103
|
* @returns {Promise<any>} The contact information extracted.
|
|
@@ -143,7 +143,7 @@ export declare class Spider {
|
|
|
143
143
|
* @param {object} data - The data to be inserted.
|
|
144
144
|
* @returns {Promise<any>} The response from the server.
|
|
145
145
|
*/
|
|
146
|
-
postData(
|
|
146
|
+
postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
|
|
147
147
|
/**
|
|
148
148
|
* Send a GET request to retrieve data from a specified table.
|
|
149
149
|
* @param {Collection} table - The table name in the database.
|
package/dist/client.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Spider = exports.
|
|
3
|
+
exports.Spider = exports.baseUrl = void 0;
|
|
4
|
+
const config_1 = require("./config");
|
|
4
5
|
const package_json_1 = require("../package.json");
|
|
5
6
|
const supabase_1 = require("./supabase");
|
|
6
7
|
const stream_reader_1 = require("./utils/stream-reader");
|
|
7
|
-
exports.
|
|
8
|
+
exports.baseUrl = config_1.APISchema["url"];
|
|
8
9
|
/**
|
|
9
10
|
* A class to interact with the Spider API.
|
|
10
11
|
*/
|
|
@@ -42,7 +43,7 @@ class Spider {
|
|
|
42
43
|
*/
|
|
43
44
|
async _apiPost(endpoint, data, stream, jsonl) {
|
|
44
45
|
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
|
|
45
|
-
const response = await fetch(`${exports.
|
|
46
|
+
const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
|
|
46
47
|
method: "POST",
|
|
47
48
|
headers: headers,
|
|
48
49
|
body: JSON.stringify(data),
|
|
@@ -64,7 +65,7 @@ class Spider {
|
|
|
64
65
|
*/
|
|
65
66
|
async _apiGet(endpoint) {
|
|
66
67
|
const headers = this.prepareHeaders;
|
|
67
|
-
const response = await fetch(`${exports.
|
|
68
|
+
const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
|
|
68
69
|
method: "GET",
|
|
69
70
|
headers: headers,
|
|
70
71
|
});
|
|
@@ -82,7 +83,7 @@ class Spider {
|
|
|
82
83
|
*/
|
|
83
84
|
async _apiDelete(endpoint) {
|
|
84
85
|
const headers = this.prepareHeaders;
|
|
85
|
-
const response = await fetch(`${exports.
|
|
86
|
+
const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
|
|
86
87
|
method: "DELETE",
|
|
87
88
|
headers,
|
|
88
89
|
});
|
|
@@ -100,7 +101,7 @@ class Spider {
|
|
|
100
101
|
* @returns {Promise<any>} The scraped data from the URL.
|
|
101
102
|
*/
|
|
102
103
|
async scrapeUrl(url, params = {}) {
|
|
103
|
-
return this._apiPost(
|
|
104
|
+
return this._apiPost(config_1.APIRoutes.Crawl, { url: url, limit: 1, ...params });
|
|
104
105
|
}
|
|
105
106
|
/**
|
|
106
107
|
* Initiates a crawling job starting from the specified URL.
|
|
@@ -112,7 +113,7 @@ class Spider {
|
|
|
112
113
|
*/
|
|
113
114
|
async crawlUrl(url, params = {}, stream = false, cb) {
|
|
114
115
|
const jsonl = stream && cb;
|
|
115
|
-
const res = await this._apiPost(
|
|
116
|
+
const res = await this._apiPost(config_1.APIRoutes.Crawl, { url: url, ...params }, stream, !!jsonl);
|
|
116
117
|
if (jsonl) {
|
|
117
118
|
return await (0, stream_reader_1.streamReader)(res, cb);
|
|
118
119
|
}
|
|
@@ -125,7 +126,7 @@ class Spider {
|
|
|
125
126
|
* @returns {Promise<any>} A list of links extracted from the URL.
|
|
126
127
|
*/
|
|
127
128
|
async links(url, params = {}) {
|
|
128
|
-
return this._apiPost(
|
|
129
|
+
return this._apiPost(config_1.APIRoutes.Links, { url: url, ...params });
|
|
129
130
|
}
|
|
130
131
|
/**
|
|
131
132
|
* Takes a screenshot of the website starting from this URL.
|
|
@@ -134,7 +135,7 @@ class Spider {
|
|
|
134
135
|
* @returns {Promise<any>} The screenshot data.
|
|
135
136
|
*/
|
|
136
137
|
async screenshot(url, params = {}) {
|
|
137
|
-
return this._apiPost(
|
|
138
|
+
return this._apiPost(config_1.APIRoutes.Screenshot, { url: url, ...params });
|
|
138
139
|
}
|
|
139
140
|
/**
|
|
140
141
|
* Perform a search and gather a list of websites to start crawling and collect resources.
|
|
@@ -143,7 +144,7 @@ class Spider {
|
|
|
143
144
|
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
|
|
144
145
|
*/
|
|
145
146
|
async search(q, params = {}) {
|
|
146
|
-
return this._apiPost(
|
|
147
|
+
return this._apiPost(config_1.APIRoutes.Search, { search: q, ...params });
|
|
147
148
|
}
|
|
148
149
|
/**
|
|
149
150
|
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
|
|
@@ -152,16 +153,19 @@ class Spider {
|
|
|
152
153
|
* @returns {Promise<any>} The transformation result.
|
|
153
154
|
*/
|
|
154
155
|
async transform(data, params = {}) {
|
|
155
|
-
return this._apiPost(
|
|
156
|
+
return this._apiPost(config_1.APIRoutes.Transform, { data, ...params });
|
|
156
157
|
}
|
|
157
158
|
/**
|
|
158
|
-
* Extracts
|
|
159
|
+
* Extracts leads from a website.
|
|
159
160
|
* @param {string} url - The URL from which to extract contacts.
|
|
160
161
|
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
161
162
|
* @returns {Promise<any>} The contact information extracted.
|
|
162
163
|
*/
|
|
163
164
|
async extractContacts(url, params = {}) {
|
|
164
|
-
return this._apiPost(
|
|
165
|
+
return this._apiPost(config_1.APIRoutes.PiplineExtractLeads, {
|
|
166
|
+
url: url,
|
|
167
|
+
...params,
|
|
168
|
+
});
|
|
165
169
|
}
|
|
166
170
|
/**
|
|
167
171
|
* Applies labeling to data extracted from a specified URL.
|
|
@@ -170,7 +174,7 @@ class Spider {
|
|
|
170
174
|
* @returns {Promise<any>} The labeled data.
|
|
171
175
|
*/
|
|
172
176
|
async label(url, params = {}) {
|
|
173
|
-
return this._apiPost(
|
|
177
|
+
return this._apiPost(config_1.APIRoutes.PiplineLabel, { url: url, ...params });
|
|
174
178
|
}
|
|
175
179
|
/**
|
|
176
180
|
* Check the crawl state of the website.
|
|
@@ -179,7 +183,7 @@ class Spider {
|
|
|
179
183
|
* @returns {Promise<any>} The crawl state data.
|
|
180
184
|
*/
|
|
181
185
|
async getCrawlState(url, params = {}) {
|
|
182
|
-
return this._apiPost(
|
|
186
|
+
return this._apiPost(config_1.APIRoutes.DataCrawlState, { url: url, ...params });
|
|
183
187
|
}
|
|
184
188
|
/**
|
|
185
189
|
* Create a signed url to download files from the storage.
|
|
@@ -199,7 +203,7 @@ class Spider {
|
|
|
199
203
|
...(limit && { limit: limit.toString() }),
|
|
200
204
|
...(expiresIn && { expiresIn: expiresIn.toString() }),
|
|
201
205
|
});
|
|
202
|
-
const endpoint = `${exports.
|
|
206
|
+
const endpoint = `${exports.baseUrl}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
|
|
203
207
|
const headers = this.prepareHeaders;
|
|
204
208
|
const response = await fetch(endpoint, {
|
|
205
209
|
method: "GET",
|
|
@@ -209,7 +213,7 @@ class Spider {
|
|
|
209
213
|
return await response.json();
|
|
210
214
|
}
|
|
211
215
|
else {
|
|
212
|
-
this.handleError(response, `Failed to
|
|
216
|
+
this.handleError(response, `Failed to sign files`);
|
|
213
217
|
}
|
|
214
218
|
}
|
|
215
219
|
/**
|
|
@@ -225,8 +229,8 @@ class Spider {
|
|
|
225
229
|
* @param {object} data - The data to be inserted.
|
|
226
230
|
* @returns {Promise<any>} The response from the server.
|
|
227
231
|
*/
|
|
228
|
-
async postData(
|
|
229
|
-
return this._apiPost(
|
|
232
|
+
async postData(collection, data) {
|
|
233
|
+
return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
|
|
230
234
|
}
|
|
231
235
|
/**
|
|
232
236
|
* Send a GET request to retrieve data from a specified table.
|
|
@@ -235,7 +239,7 @@ class Spider {
|
|
|
235
239
|
* @returns {Promise<any>} The response from the server.
|
|
236
240
|
*/
|
|
237
241
|
async getData(collections, params) {
|
|
238
|
-
return this._apiGet(
|
|
242
|
+
return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
|
|
239
243
|
}
|
|
240
244
|
/**
|
|
241
245
|
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
@@ -244,8 +248,8 @@ class Spider {
|
|
|
244
248
|
*/
|
|
245
249
|
async download(query, output) {
|
|
246
250
|
const headers = this.prepareHeaders;
|
|
247
|
-
const endpoint =
|
|
248
|
-
const response = await fetch(`${exports.
|
|
251
|
+
const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
|
|
252
|
+
const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
|
|
249
253
|
method: "GET",
|
|
250
254
|
headers,
|
|
251
255
|
});
|
|
@@ -265,7 +269,7 @@ class Spider {
|
|
|
265
269
|
* @returns {Promise<any>} The response from the server.
|
|
266
270
|
*/
|
|
267
271
|
async query(query) {
|
|
268
|
-
return this._apiGet(
|
|
272
|
+
return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
|
|
269
273
|
}
|
|
270
274
|
/**
|
|
271
275
|
* Send a DELETE request to remove data from a specified table.
|
|
@@ -274,7 +278,7 @@ class Spider {
|
|
|
274
278
|
* @returns {Promise<any>} The response from the server.
|
|
275
279
|
*/
|
|
276
280
|
async deleteData(collection, params) {
|
|
277
|
-
return this._apiDelete(
|
|
281
|
+
return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
|
|
278
282
|
}
|
|
279
283
|
/**
|
|
280
284
|
* Prepares common headers for each API request.
|
package/dist/config.d.ts
CHANGED
|
@@ -272,4 +272,36 @@ export declare enum Collection {
|
|
|
272
272
|
Webhooks = "webhooks",
|
|
273
273
|
APIKeys = "api_keys"
|
|
274
274
|
}
|
|
275
|
+
declare enum ApiVersion {
|
|
276
|
+
V1 = "v1"
|
|
277
|
+
}
|
|
278
|
+
export declare enum APIRoutes {
|
|
279
|
+
Crawl = "crawl",
|
|
280
|
+
Links = "links",
|
|
281
|
+
Screenshot = "screenshot",
|
|
282
|
+
Search = "search",
|
|
283
|
+
Transform = "transform",
|
|
284
|
+
PiplineExtractLeads = "pipeline/extract-contacts",
|
|
285
|
+
PiplineLabel = "pipeline/label",
|
|
286
|
+
Data = "data",
|
|
287
|
+
DataCrawlState = "data/crawl_state",
|
|
288
|
+
DataSignUrl = "data/sign-url",
|
|
289
|
+
DataDownload = "data/download",
|
|
290
|
+
DataQuery = "data/query"
|
|
291
|
+
}
|
|
292
|
+
export declare const APISchema: {
|
|
293
|
+
url: string;
|
|
294
|
+
versions: {
|
|
295
|
+
current: ApiVersion;
|
|
296
|
+
v1: {
|
|
297
|
+
routes: typeof APIRoutes;
|
|
298
|
+
end_date: string;
|
|
299
|
+
};
|
|
300
|
+
latest: {
|
|
301
|
+
routes: typeof APIRoutes;
|
|
302
|
+
end_date: string;
|
|
303
|
+
};
|
|
304
|
+
};
|
|
305
|
+
};
|
|
306
|
+
export declare const setBaseUrl: (url: string) => void;
|
|
275
307
|
export {};
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Collection = void 0;
|
|
3
|
+
exports.setBaseUrl = exports.APISchema = exports.APIRoutes = exports.Collection = void 0;
|
|
4
4
|
// records that you can query
|
|
5
5
|
var Collection;
|
|
6
6
|
(function (Collection) {
|
|
@@ -16,3 +16,58 @@ var Collection;
|
|
|
16
16
|
Collection["Webhooks"] = "webhooks";
|
|
17
17
|
Collection["APIKeys"] = "api_keys";
|
|
18
18
|
})(Collection || (exports.Collection = Collection = {}));
|
|
19
|
+
// The API version for Spider
|
|
20
|
+
var ApiVersion;
|
|
21
|
+
(function (ApiVersion) {
|
|
22
|
+
ApiVersion["V1"] = "v1";
|
|
23
|
+
})(ApiVersion || (ApiVersion = {}));
|
|
24
|
+
// The API routes paths.
|
|
25
|
+
var APIRoutes;
|
|
26
|
+
(function (APIRoutes) {
|
|
27
|
+
// Crawl a website to collect the contents. Can be one page or many.
|
|
28
|
+
APIRoutes["Crawl"] = "crawl";
|
|
29
|
+
// Crawl a website to collect the links. Can be one page or many.
|
|
30
|
+
APIRoutes["Links"] = "links";
|
|
31
|
+
// Crawl a website to collect screenshots. Can be one page or many.
|
|
32
|
+
APIRoutes["Screenshot"] = "screenshot";
|
|
33
|
+
// Search for something and optionally crawl the pages or get the results of the search.
|
|
34
|
+
APIRoutes["Search"] = "search";
|
|
35
|
+
// Transform HTML to markdown or text.
|
|
36
|
+
APIRoutes["Transform"] = "transform";
|
|
37
|
+
// Pipeline extract leads for a website - emails, phones, etc.
|
|
38
|
+
APIRoutes["PiplineExtractLeads"] = "pipeline/extract-contacts";
|
|
39
|
+
// Pipeline label a website by category using AI and metadata.
|
|
40
|
+
APIRoutes["PiplineLabel"] = "pipeline/label";
|
|
41
|
+
// Dynamic collection routes.
|
|
42
|
+
APIRoutes["Data"] = "data";
|
|
43
|
+
// The last crawl state of a website.
|
|
44
|
+
APIRoutes["DataCrawlState"] = "data/crawl_state";
|
|
45
|
+
// Sign a file from storage based on the exact url path of the storage or domain - pathname.
|
|
46
|
+
APIRoutes["DataSignUrl"] = "data/sign-url";
|
|
47
|
+
// Download a file from storage based on the exact url path of the storage or domain - pathname.
|
|
48
|
+
APIRoutes["DataDownload"] = "data/download";
|
|
49
|
+
// Perform a query on the global database to grab content without crawling if available.
|
|
50
|
+
APIRoutes["DataQuery"] = "data/query";
|
|
51
|
+
})(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
|
|
52
|
+
// The base API target info for Spider Cloud.
|
|
53
|
+
exports.APISchema = {
|
|
54
|
+
url: "https://api.spider.cloud",
|
|
55
|
+
versions: {
|
|
56
|
+
current: ApiVersion.V1,
|
|
57
|
+
v1: {
|
|
58
|
+
routes: APIRoutes,
|
|
59
|
+
end_date: "",
|
|
60
|
+
},
|
|
61
|
+
latest: {
|
|
62
|
+
routes: APIRoutes,
|
|
63
|
+
end_date: "",
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
// Adjust the Spider Cloud endpoint.
|
|
68
|
+
const setBaseUrl = (url) => {
|
|
69
|
+
if (url) {
|
|
70
|
+
exports.APISchema["url"] = url;
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
exports.setBaseUrl = setBaseUrl;
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Collection = exports.Spider = void 0;
|
|
3
|
+
exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
|
|
4
4
|
var client_1 = require("./client");
|
|
5
5
|
Object.defineProperty(exports, "Spider", { enumerable: true, get: function () { return client_1.Spider; } });
|
|
6
6
|
var config_1 = require("./config");
|
|
7
7
|
Object.defineProperty(exports, "Collection", { enumerable: true, get: function () { return config_1.Collection; } });
|
|
8
|
+
Object.defineProperty(exports, "setBaseUrl", { enumerable: true, get: function () { return config_1.setBaseUrl; } });
|
|
9
|
+
Object.defineProperty(exports, "APISchema", { enumerable: true, get: function () { return config_1.APISchema; } });
|