@spider-cloud/spider-client 0.0.59 → 0.0.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +2 -3
- package/dist/client.js +27 -24
- package/dist/config.d.ts +32 -0
- package/dist/config.js +56 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -1
- package/package.json +1 -1
package/dist/client.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
|
|
2
|
-
export declare const BASE_API_URL = "https://api.spider.cloud";
|
|
3
2
|
/**
|
|
4
3
|
* Generic params for core request.
|
|
5
4
|
*/
|
|
@@ -97,7 +96,7 @@ export declare class Spider {
|
|
|
97
96
|
url?: string;
|
|
98
97
|
}[], params?: {}): Promise<any>;
|
|
99
98
|
/**
|
|
100
|
-
* Extracts
|
|
99
|
+
* Extracts leads from a website.
|
|
101
100
|
* @param {string} url - The URL from which to extract contacts.
|
|
102
101
|
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
103
102
|
* @returns {Promise<any>} The contact information extracted.
|
|
@@ -143,7 +142,7 @@ export declare class Spider {
|
|
|
143
142
|
* @param {object} data - The data to be inserted.
|
|
144
143
|
* @returns {Promise<any>} The response from the server.
|
|
145
144
|
*/
|
|
146
|
-
postData(
|
|
145
|
+
postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
|
|
147
146
|
/**
|
|
148
147
|
* Send a GET request to retrieve data from a specified table.
|
|
149
148
|
* @param {Collection} table - The table name in the database.
|
package/dist/client.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Spider =
|
|
3
|
+
exports.Spider = void 0;
|
|
4
|
+
const config_1 = require("./config");
|
|
4
5
|
const package_json_1 = require("../package.json");
|
|
5
6
|
const supabase_1 = require("./supabase");
|
|
6
7
|
const stream_reader_1 = require("./utils/stream-reader");
|
|
7
|
-
exports.BASE_API_URL = "https://api.spider.cloud";
|
|
8
8
|
/**
|
|
9
9
|
* A class to interact with the Spider API.
|
|
10
10
|
*/
|
|
@@ -42,7 +42,7 @@ class Spider {
|
|
|
42
42
|
*/
|
|
43
43
|
async _apiPost(endpoint, data, stream, jsonl) {
|
|
44
44
|
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
|
|
45
|
-
const response = await fetch(`${
|
|
45
|
+
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
46
46
|
method: "POST",
|
|
47
47
|
headers: headers,
|
|
48
48
|
body: JSON.stringify(data),
|
|
@@ -64,7 +64,7 @@ class Spider {
|
|
|
64
64
|
*/
|
|
65
65
|
async _apiGet(endpoint) {
|
|
66
66
|
const headers = this.prepareHeaders;
|
|
67
|
-
const response = await fetch(`${
|
|
67
|
+
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
68
68
|
method: "GET",
|
|
69
69
|
headers: headers,
|
|
70
70
|
});
|
|
@@ -82,7 +82,7 @@ class Spider {
|
|
|
82
82
|
*/
|
|
83
83
|
async _apiDelete(endpoint) {
|
|
84
84
|
const headers = this.prepareHeaders;
|
|
85
|
-
const response = await fetch(`${
|
|
85
|
+
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
86
86
|
method: "DELETE",
|
|
87
87
|
headers,
|
|
88
88
|
});
|
|
@@ -100,7 +100,7 @@ class Spider {
|
|
|
100
100
|
* @returns {Promise<any>} The scraped data from the URL.
|
|
101
101
|
*/
|
|
102
102
|
async scrapeUrl(url, params = {}) {
|
|
103
|
-
return this._apiPost(
|
|
103
|
+
return this._apiPost(config_1.APIRoutes.Crawl, { url: url, limit: 1, ...params });
|
|
104
104
|
}
|
|
105
105
|
/**
|
|
106
106
|
* Initiates a crawling job starting from the specified URL.
|
|
@@ -112,7 +112,7 @@ class Spider {
|
|
|
112
112
|
*/
|
|
113
113
|
async crawlUrl(url, params = {}, stream = false, cb) {
|
|
114
114
|
const jsonl = stream && cb;
|
|
115
|
-
const res = await this._apiPost(
|
|
115
|
+
const res = await this._apiPost(config_1.APIRoutes.Crawl, { url: url, ...params }, stream, !!jsonl);
|
|
116
116
|
if (jsonl) {
|
|
117
117
|
return await (0, stream_reader_1.streamReader)(res, cb);
|
|
118
118
|
}
|
|
@@ -125,7 +125,7 @@ class Spider {
|
|
|
125
125
|
* @returns {Promise<any>} A list of links extracted from the URL.
|
|
126
126
|
*/
|
|
127
127
|
async links(url, params = {}) {
|
|
128
|
-
return this._apiPost(
|
|
128
|
+
return this._apiPost(config_1.APIRoutes.Links, { url: url, ...params });
|
|
129
129
|
}
|
|
130
130
|
/**
|
|
131
131
|
* Takes a screenshot of the website starting from this URL.
|
|
@@ -134,7 +134,7 @@ class Spider {
|
|
|
134
134
|
* @returns {Promise<any>} The screenshot data.
|
|
135
135
|
*/
|
|
136
136
|
async screenshot(url, params = {}) {
|
|
137
|
-
return this._apiPost(
|
|
137
|
+
return this._apiPost(config_1.APIRoutes.Screenshot, { url: url, ...params });
|
|
138
138
|
}
|
|
139
139
|
/**
|
|
140
140
|
* Perform a search and gather a list of websites to start crawling and collect resources.
|
|
@@ -143,7 +143,7 @@ class Spider {
|
|
|
143
143
|
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
|
|
144
144
|
*/
|
|
145
145
|
async search(q, params = {}) {
|
|
146
|
-
return this._apiPost(
|
|
146
|
+
return this._apiPost(config_1.APIRoutes.Search, { search: q, ...params });
|
|
147
147
|
}
|
|
148
148
|
/**
|
|
149
149
|
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
|
|
@@ -152,16 +152,19 @@ class Spider {
|
|
|
152
152
|
* @returns {Promise<any>} The transformation result.
|
|
153
153
|
*/
|
|
154
154
|
async transform(data, params = {}) {
|
|
155
|
-
return this._apiPost(
|
|
155
|
+
return this._apiPost(config_1.APIRoutes.Transform, { data, ...params });
|
|
156
156
|
}
|
|
157
157
|
/**
|
|
158
|
-
* Extracts
|
|
158
|
+
* Extracts leads from a website.
|
|
159
159
|
* @param {string} url - The URL from which to extract contacts.
|
|
160
160
|
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
161
161
|
* @returns {Promise<any>} The contact information extracted.
|
|
162
162
|
*/
|
|
163
163
|
async extractContacts(url, params = {}) {
|
|
164
|
-
return this._apiPost(
|
|
164
|
+
return this._apiPost(config_1.APIRoutes.PiplineExtractLeads, {
|
|
165
|
+
url: url,
|
|
166
|
+
...params,
|
|
167
|
+
});
|
|
165
168
|
}
|
|
166
169
|
/**
|
|
167
170
|
* Applies labeling to data extracted from a specified URL.
|
|
@@ -170,7 +173,7 @@ class Spider {
|
|
|
170
173
|
* @returns {Promise<any>} The labeled data.
|
|
171
174
|
*/
|
|
172
175
|
async label(url, params = {}) {
|
|
173
|
-
return this._apiPost(
|
|
176
|
+
return this._apiPost(config_1.APIRoutes.PiplineLabel, { url: url, ...params });
|
|
174
177
|
}
|
|
175
178
|
/**
|
|
176
179
|
* Check the crawl state of the website.
|
|
@@ -179,7 +182,7 @@ class Spider {
|
|
|
179
182
|
* @returns {Promise<any>} The crawl state data.
|
|
180
183
|
*/
|
|
181
184
|
async getCrawlState(url, params = {}) {
|
|
182
|
-
return this._apiPost(
|
|
185
|
+
return this._apiPost(config_1.APIRoutes.DataCrawlState, { url: url, ...params });
|
|
183
186
|
}
|
|
184
187
|
/**
|
|
185
188
|
* Create a signed url to download files from the storage.
|
|
@@ -199,7 +202,7 @@ class Spider {
|
|
|
199
202
|
...(limit && { limit: limit.toString() }),
|
|
200
203
|
...(expiresIn && { expiresIn: expiresIn.toString() }),
|
|
201
204
|
});
|
|
202
|
-
const endpoint = `${
|
|
205
|
+
const endpoint = `${config_1.APISchema["url"]}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
|
|
203
206
|
const headers = this.prepareHeaders;
|
|
204
207
|
const response = await fetch(endpoint, {
|
|
205
208
|
method: "GET",
|
|
@@ -209,7 +212,7 @@ class Spider {
|
|
|
209
212
|
return await response.json();
|
|
210
213
|
}
|
|
211
214
|
else {
|
|
212
|
-
this.handleError(response, `Failed to
|
|
215
|
+
this.handleError(response, `Failed to sign files`);
|
|
213
216
|
}
|
|
214
217
|
}
|
|
215
218
|
/**
|
|
@@ -225,8 +228,8 @@ class Spider {
|
|
|
225
228
|
* @param {object} data - The data to be inserted.
|
|
226
229
|
* @returns {Promise<any>} The response from the server.
|
|
227
230
|
*/
|
|
228
|
-
async postData(
|
|
229
|
-
return this._apiPost(
|
|
231
|
+
async postData(collection, data) {
|
|
232
|
+
return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
|
|
230
233
|
}
|
|
231
234
|
/**
|
|
232
235
|
* Send a GET request to retrieve data from a specified table.
|
|
@@ -235,7 +238,7 @@ class Spider {
|
|
|
235
238
|
* @returns {Promise<any>} The response from the server.
|
|
236
239
|
*/
|
|
237
240
|
async getData(collections, params) {
|
|
238
|
-
return this._apiGet(
|
|
241
|
+
return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
|
|
239
242
|
}
|
|
240
243
|
/**
|
|
241
244
|
* Download a record. The url is the path of the storage hash returned and not the exact website url.
|
|
@@ -244,8 +247,8 @@ class Spider {
|
|
|
244
247
|
*/
|
|
245
248
|
async download(query, output) {
|
|
246
249
|
const headers = this.prepareHeaders;
|
|
247
|
-
const endpoint =
|
|
248
|
-
const response = await fetch(`${
|
|
250
|
+
const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
|
|
251
|
+
const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
|
|
249
252
|
method: "GET",
|
|
250
253
|
headers,
|
|
251
254
|
});
|
|
@@ -265,7 +268,7 @@ class Spider {
|
|
|
265
268
|
* @returns {Promise<any>} The response from the server.
|
|
266
269
|
*/
|
|
267
270
|
async query(query) {
|
|
268
|
-
return this._apiGet(
|
|
271
|
+
return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
|
|
269
272
|
}
|
|
270
273
|
/**
|
|
271
274
|
* Send a DELETE request to remove data from a specified table.
|
|
@@ -274,7 +277,7 @@ class Spider {
|
|
|
274
277
|
* @returns {Promise<any>} The response from the server.
|
|
275
278
|
*/
|
|
276
279
|
async deleteData(collection, params) {
|
|
277
|
-
return this._apiDelete(
|
|
280
|
+
return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
|
|
278
281
|
}
|
|
279
282
|
/**
|
|
280
283
|
* Prepares common headers for each API request.
|
package/dist/config.d.ts
CHANGED
|
@@ -272,4 +272,36 @@ export declare enum Collection {
|
|
|
272
272
|
Webhooks = "webhooks",
|
|
273
273
|
APIKeys = "api_keys"
|
|
274
274
|
}
|
|
275
|
+
export declare enum ApiVersion {
|
|
276
|
+
V1 = "v1"
|
|
277
|
+
}
|
|
278
|
+
export declare enum APIRoutes {
|
|
279
|
+
Crawl = "crawl",
|
|
280
|
+
Links = "links",
|
|
281
|
+
Screenshot = "screenshot",
|
|
282
|
+
Search = "search",
|
|
283
|
+
Transform = "transform",
|
|
284
|
+
PiplineExtractLeads = "pipeline/extract-contacts",
|
|
285
|
+
PiplineLabel = "pipeline/label",
|
|
286
|
+
Data = "data",
|
|
287
|
+
DataCrawlState = "data/crawl_state",
|
|
288
|
+
DataSignUrl = "data/sign-url",
|
|
289
|
+
DataDownload = "data/download",
|
|
290
|
+
DataQuery = "data/query"
|
|
291
|
+
}
|
|
292
|
+
export declare const APISchema: {
|
|
293
|
+
url: string;
|
|
294
|
+
versions: {
|
|
295
|
+
current: ApiVersion;
|
|
296
|
+
v1: {
|
|
297
|
+
routes: typeof APIRoutes;
|
|
298
|
+
end_date: string;
|
|
299
|
+
};
|
|
300
|
+
latest: {
|
|
301
|
+
routes: typeof APIRoutes;
|
|
302
|
+
end_date: string;
|
|
303
|
+
};
|
|
304
|
+
};
|
|
305
|
+
};
|
|
306
|
+
export declare const setBaseUrl: (url: string) => void;
|
|
275
307
|
export {};
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Collection = void 0;
|
|
3
|
+
exports.setBaseUrl = exports.APISchema = exports.APIRoutes = exports.ApiVersion = exports.Collection = void 0;
|
|
4
4
|
// records that you can query
|
|
5
5
|
var Collection;
|
|
6
6
|
(function (Collection) {
|
|
@@ -16,3 +16,58 @@ var Collection;
|
|
|
16
16
|
Collection["Webhooks"] = "webhooks";
|
|
17
17
|
Collection["APIKeys"] = "api_keys";
|
|
18
18
|
})(Collection || (exports.Collection = Collection = {}));
|
|
19
|
+
// The API version for Spider
|
|
20
|
+
var ApiVersion;
|
|
21
|
+
(function (ApiVersion) {
|
|
22
|
+
ApiVersion["V1"] = "v1";
|
|
23
|
+
})(ApiVersion || (exports.ApiVersion = ApiVersion = {}));
|
|
24
|
+
// The API routes paths.
|
|
25
|
+
var APIRoutes;
|
|
26
|
+
(function (APIRoutes) {
|
|
27
|
+
// Crawl a website to collect the contents. Can be one page or many.
|
|
28
|
+
APIRoutes["Crawl"] = "crawl";
|
|
29
|
+
// Crawl a website to collect the links. Can be one page or many.
|
|
30
|
+
APIRoutes["Links"] = "links";
|
|
31
|
+
// Crawl a website to collect screenshots. Can be one page or many.
|
|
32
|
+
APIRoutes["Screenshot"] = "screenshot";
|
|
33
|
+
// Search for something and optionally crawl the pages or get the results of the search.
|
|
34
|
+
APIRoutes["Search"] = "search";
|
|
35
|
+
// Transform HTML to markdown or text.
|
|
36
|
+
APIRoutes["Transform"] = "transform";
|
|
37
|
+
// Pipeline extract leads for a website - emails, phones, etc.
|
|
38
|
+
APIRoutes["PiplineExtractLeads"] = "pipeline/extract-contacts";
|
|
39
|
+
// Pipeline label a website by category using AI and metadata.
|
|
40
|
+
APIRoutes["PiplineLabel"] = "pipeline/label";
|
|
41
|
+
// Dynamic collection routes.
|
|
42
|
+
APIRoutes["Data"] = "data";
|
|
43
|
+
// The last crawl state of a website.
|
|
44
|
+
APIRoutes["DataCrawlState"] = "data/crawl_state";
|
|
45
|
+
// Sign a file from storage based on the exact url path of the storage or domain - pathname.
|
|
46
|
+
APIRoutes["DataSignUrl"] = "data/sign-url";
|
|
47
|
+
// Download a file from storage based on the exact url path of the storage or domain - pathname.
|
|
48
|
+
APIRoutes["DataDownload"] = "data/download";
|
|
49
|
+
// Perform a query on the global database to grab content without crawling if available.
|
|
50
|
+
APIRoutes["DataQuery"] = "data/query";
|
|
51
|
+
})(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
|
|
52
|
+
// The base API target info for Spider Cloud.
|
|
53
|
+
exports.APISchema = {
|
|
54
|
+
url: "https://api.spider.cloud",
|
|
55
|
+
versions: {
|
|
56
|
+
current: ApiVersion.V1,
|
|
57
|
+
v1: {
|
|
58
|
+
routes: APIRoutes,
|
|
59
|
+
end_date: "",
|
|
60
|
+
},
|
|
61
|
+
latest: {
|
|
62
|
+
routes: APIRoutes,
|
|
63
|
+
end_date: "",
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
// Adjust the Spider Cloud endpoint.
|
|
68
|
+
const setBaseUrl = (url) => {
|
|
69
|
+
if (url) {
|
|
70
|
+
exports.APISchema["url"] = url;
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
exports.setBaseUrl = setBaseUrl;
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Collection = exports.Spider = void 0;
|
|
3
|
+
exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
|
|
4
4
|
var client_1 = require("./client");
|
|
5
5
|
Object.defineProperty(exports, "Spider", { enumerable: true, get: function () { return client_1.Spider; } });
|
|
6
6
|
var config_1 = require("./config");
|
|
7
7
|
Object.defineProperty(exports, "Collection", { enumerable: true, get: function () { return config_1.Collection; } });
|
|
8
|
+
Object.defineProperty(exports, "setBaseUrl", { enumerable: true, get: function () { return config_1.setBaseUrl; } });
|
|
9
|
+
Object.defineProperty(exports, "APISchema", { enumerable: true, get: function () { return config_1.APISchema; } });
|