@spider-cloud/spider-client 0.0.59 → 0.0.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.d.ts CHANGED
@@ -1,5 +1,4 @@
1
1
  import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
2
- export declare const BASE_API_URL = "https://api.spider.cloud";
3
2
  /**
4
3
  * Generic params for core request.
5
4
  */
@@ -97,7 +96,7 @@ export declare class Spider {
97
96
  url?: string;
98
97
  }[], params?: {}): Promise<any>;
99
98
  /**
100
- * Extracts contact information from the specified URL.
99
+ * Extracts leads from a website.
101
100
  * @param {string} url - The URL from which to extract contacts.
102
101
  * @param {GenericParams} [params={}] - Configuration parameters for the extraction.
103
102
  * @returns {Promise<any>} The contact information extracted.
@@ -143,7 +142,7 @@ export declare class Spider {
143
142
  * @param {object} data - The data to be inserted.
144
143
  * @returns {Promise<any>} The response from the server.
145
144
  */
146
- postData(table: string, data: GenericParams | Record<string, any>): Promise<any>;
145
+ postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
147
146
  /**
148
147
  * Send a GET request to retrieve data from a specified table.
149
148
  * @param {Collection} table - The table name in the database.
package/dist/client.js CHANGED
@@ -1,10 +1,10 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Spider = exports.BASE_API_URL = void 0;
3
+ exports.Spider = void 0;
4
+ const config_1 = require("./config");
4
5
  const package_json_1 = require("../package.json");
5
6
  const supabase_1 = require("./supabase");
6
7
  const stream_reader_1 = require("./utils/stream-reader");
7
- exports.BASE_API_URL = "https://api.spider.cloud";
8
8
  /**
9
9
  * A class to interact with the Spider API.
10
10
  */
@@ -42,7 +42,7 @@ class Spider {
42
42
  */
43
43
  async _apiPost(endpoint, data, stream, jsonl) {
44
44
  const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
45
- const response = await fetch(`${exports.BASE_API_URL}/v1/${endpoint}`, {
45
+ const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
46
46
  method: "POST",
47
47
  headers: headers,
48
48
  body: JSON.stringify(data),
@@ -64,7 +64,7 @@ class Spider {
64
64
  */
65
65
  async _apiGet(endpoint) {
66
66
  const headers = this.prepareHeaders;
67
- const response = await fetch(`${exports.BASE_API_URL}/v1/${endpoint}`, {
67
+ const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
68
68
  method: "GET",
69
69
  headers: headers,
70
70
  });
@@ -82,7 +82,7 @@ class Spider {
82
82
  */
83
83
  async _apiDelete(endpoint) {
84
84
  const headers = this.prepareHeaders;
85
- const response = await fetch(`${exports.BASE_API_URL}/v1/${endpoint}`, {
85
+ const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
86
86
  method: "DELETE",
87
87
  headers,
88
88
  });
@@ -100,7 +100,7 @@ class Spider {
100
100
  * @returns {Promise<any>} The scraped data from the URL.
101
101
  */
102
102
  async scrapeUrl(url, params = {}) {
103
- return this._apiPost("crawl", { url: url, limit: 1, ...params });
103
+ return this._apiPost(config_1.APIRoutes.Crawl, { url: url, limit: 1, ...params });
104
104
  }
105
105
  /**
106
106
  * Initiates a crawling job starting from the specified URL.
@@ -112,7 +112,7 @@ class Spider {
112
112
  */
113
113
  async crawlUrl(url, params = {}, stream = false, cb) {
114
114
  const jsonl = stream && cb;
115
- const res = await this._apiPost("crawl", { url: url, ...params }, stream, !!jsonl);
115
+ const res = await this._apiPost(config_1.APIRoutes.Crawl, { url: url, ...params }, stream, !!jsonl);
116
116
  if (jsonl) {
117
117
  return await (0, stream_reader_1.streamReader)(res, cb);
118
118
  }
@@ -125,7 +125,7 @@ class Spider {
125
125
  * @returns {Promise<any>} A list of links extracted from the URL.
126
126
  */
127
127
  async links(url, params = {}) {
128
- return this._apiPost("links", { url: url, ...params });
128
+ return this._apiPost(config_1.APIRoutes.Links, { url: url, ...params });
129
129
  }
130
130
  /**
131
131
  * Takes a screenshot of the website starting from this URL.
@@ -134,7 +134,7 @@ class Spider {
134
134
  * @returns {Promise<any>} The screenshot data.
135
135
  */
136
136
  async screenshot(url, params = {}) {
137
- return this._apiPost("screenshot", { url: url, ...params });
137
+ return this._apiPost(config_1.APIRoutes.Screenshot, { url: url, ...params });
138
138
  }
139
139
  /**
140
140
  * Perform a search and gather a list of websites to start crawling and collect resources.
@@ -143,7 +143,7 @@ class Spider {
143
143
  * @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
144
144
  */
145
145
  async search(q, params = {}) {
146
- return this._apiPost("search", { search: q, ...params });
146
+ return this._apiPost(config_1.APIRoutes.Search, { search: q, ...params });
147
147
  }
148
148
  /**
149
149
  * Transform HTML to Markdown or text. You can send up to 10MB of data at once.
@@ -152,16 +152,19 @@ class Spider {
152
152
  * @returns {Promise<any>} The transformation result.
153
153
  */
154
154
  async transform(data, params = {}) {
155
- return this._apiPost("transform", { data, ...params });
155
+ return this._apiPost(config_1.APIRoutes.Transform, { data, ...params });
156
156
  }
157
157
  /**
158
- * Extracts contact information from the specified URL.
158
+ * Extracts leads from a website.
159
159
  * @param {string} url - The URL from which to extract contacts.
160
160
  * @param {GenericParams} [params={}] - Configuration parameters for the extraction.
161
161
  * @returns {Promise<any>} The contact information extracted.
162
162
  */
163
163
  async extractContacts(url, params = {}) {
164
- return this._apiPost("pipeline/extract-contacts", { url: url, ...params });
164
+ return this._apiPost(config_1.APIRoutes.PiplineExtractLeads, {
165
+ url: url,
166
+ ...params,
167
+ });
165
168
  }
166
169
  /**
167
170
  * Applies labeling to data extracted from a specified URL.
@@ -170,7 +173,7 @@ class Spider {
170
173
  * @returns {Promise<any>} The labeled data.
171
174
  */
172
175
  async label(url, params = {}) {
173
- return this._apiPost("pipeline/label", { url: url, ...params });
176
+ return this._apiPost(config_1.APIRoutes.PiplineLabel, { url: url, ...params });
174
177
  }
175
178
  /**
176
179
  * Check the crawl state of the website.
@@ -179,7 +182,7 @@ class Spider {
179
182
  * @returns {Promise<any>} The crawl state data.
180
183
  */
181
184
  async getCrawlState(url, params = {}) {
182
- return this._apiPost("data/crawl_state", { url: url, ...params });
185
+ return this._apiPost(config_1.APIRoutes.DataCrawlState, { url: url, ...params });
183
186
  }
184
187
  /**
185
188
  * Create a signed url to download files from the storage.
@@ -199,7 +202,7 @@ class Spider {
199
202
  ...(limit && { limit: limit.toString() }),
200
203
  ...(expiresIn && { expiresIn: expiresIn.toString() }),
201
204
  });
202
- const endpoint = `${exports.BASE_API_URL}/data/sign-url?${params.toString()}`;
205
+ const endpoint = `${config_1.APISchema["url"]}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
203
206
  const headers = this.prepareHeaders;
204
207
  const response = await fetch(endpoint, {
205
208
  method: "GET",
@@ -209,7 +212,7 @@ class Spider {
209
212
  return await response.json();
210
213
  }
211
214
  else {
212
- this.handleError(response, `Failed to download files`);
215
+ this.handleError(response, `Failed to sign files`);
213
216
  }
214
217
  }
215
218
  /**
@@ -225,8 +228,8 @@ class Spider {
225
228
  * @param {object} data - The data to be inserted.
226
229
  * @returns {Promise<any>} The response from the server.
227
230
  */
228
- async postData(table, data) {
229
- return this._apiPost(`data/${table}`, data);
231
+ async postData(collection, data) {
232
+ return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
230
233
  }
231
234
  /**
232
235
  * Send a GET request to retrieve data from a specified table.
@@ -235,7 +238,7 @@ class Spider {
235
238
  * @returns {Promise<any>} The response from the server.
236
239
  */
237
240
  async getData(collections, params) {
238
- return this._apiGet(`data/${collections}?${new URLSearchParams(params).toString()}`);
241
+ return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
239
242
  }
240
243
  /**
241
244
  * Download a record. The url is the path of the storage hash returned and not the exact website url.
@@ -244,8 +247,8 @@ class Spider {
244
247
  */
245
248
  async download(query, output) {
246
249
  const headers = this.prepareHeaders;
247
- const endpoint = `data/download?${new URLSearchParams(query).toString()}`;
248
- const response = await fetch(`${exports.BASE_API_URL}/v1/${endpoint}`, {
250
+ const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
251
+ const response = await fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
249
252
  method: "GET",
250
253
  headers,
251
254
  });
@@ -265,7 +268,7 @@ class Spider {
265
268
  * @returns {Promise<any>} The response from the server.
266
269
  */
267
270
  async query(query) {
268
- return this._apiGet(`data/query?${new URLSearchParams(query).toString()}`);
271
+ return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
269
272
  }
270
273
  /**
271
274
  * Send a DELETE request to remove data from a specified table.
@@ -274,7 +277,7 @@ class Spider {
274
277
  * @returns {Promise<any>} The response from the server.
275
278
  */
276
279
  async deleteData(collection, params) {
277
- return this._apiDelete(`data/${collection}?${new URLSearchParams(params).toString()}`);
280
+ return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
278
281
  }
279
282
  /**
280
283
  * Prepares common headers for each API request.
package/dist/config.d.ts CHANGED
@@ -272,4 +272,36 @@ export declare enum Collection {
272
272
  Webhooks = "webhooks",
273
273
  APIKeys = "api_keys"
274
274
  }
275
+ export declare enum ApiVersion {
276
+ V1 = "v1"
277
+ }
278
+ export declare enum APIRoutes {
279
+ Crawl = "crawl",
280
+ Links = "links",
281
+ Screenshot = "screenshot",
282
+ Search = "search",
283
+ Transform = "transform",
284
+ PiplineExtractLeads = "pipeline/extract-contacts",
285
+ PiplineLabel = "pipeline/label",
286
+ Data = "data",
287
+ DataCrawlState = "data/crawl_state",
288
+ DataSignUrl = "data/sign-url",
289
+ DataDownload = "data/download",
290
+ DataQuery = "data/query"
291
+ }
292
+ export declare const APISchema: {
293
+ url: string;
294
+ versions: {
295
+ current: ApiVersion;
296
+ v1: {
297
+ routes: typeof APIRoutes;
298
+ end_date: string;
299
+ };
300
+ latest: {
301
+ routes: typeof APIRoutes;
302
+ end_date: string;
303
+ };
304
+ };
305
+ };
306
+ export declare const setBaseUrl: (url: string) => void;
275
307
  export {};
package/dist/config.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Collection = void 0;
3
+ exports.setBaseUrl = exports.APISchema = exports.APIRoutes = exports.ApiVersion = exports.Collection = void 0;
4
4
  // records that you can query
5
5
  var Collection;
6
6
  (function (Collection) {
@@ -16,3 +16,58 @@ var Collection;
16
16
  Collection["Webhooks"] = "webhooks";
17
17
  Collection["APIKeys"] = "api_keys";
18
18
  })(Collection || (exports.Collection = Collection = {}));
19
+ // The API version for Spider
20
+ var ApiVersion;
21
+ (function (ApiVersion) {
22
+ ApiVersion["V1"] = "v1";
23
+ })(ApiVersion || (exports.ApiVersion = ApiVersion = {}));
24
+ // The API routes paths.
25
+ var APIRoutes;
26
+ (function (APIRoutes) {
27
+ // Crawl a website to collect the contents. Can be one page or many.
28
+ APIRoutes["Crawl"] = "crawl";
29
+ // Crawl a website to collect the links. Can be one page or many.
30
+ APIRoutes["Links"] = "links";
31
+ // Crawl a website to collect screenshots. Can be one page or many.
32
+ APIRoutes["Screenshot"] = "screenshot";
33
+ // Search for something and optionally crawl the pages or get the results of the search.
34
+ APIRoutes["Search"] = "search";
35
+ // Transform HTML to markdown or text.
36
+ APIRoutes["Transform"] = "transform";
37
+ // Pipeline extract leads for a website - emails, phones, etc.
38
+ APIRoutes["PiplineExtractLeads"] = "pipeline/extract-contacts";
39
+ // Pipeline label a website by category using AI and metadata.
40
+ APIRoutes["PiplineLabel"] = "pipeline/label";
41
+ // Dynamic collection routes.
42
+ APIRoutes["Data"] = "data";
43
+ // The last crawl state of a website.
44
+ APIRoutes["DataCrawlState"] = "data/crawl_state";
45
+ // Sign a file from storage based on the exact url path of the storage or domain - pathname.
46
+ APIRoutes["DataSignUrl"] = "data/sign-url";
47
+ // Download a file from storage based on the exact url path of the storage or domain - pathname.
48
+ APIRoutes["DataDownload"] = "data/download";
49
+ // Perform a query on the global database to grab content without crawling if available.
50
+ APIRoutes["DataQuery"] = "data/query";
51
+ })(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
52
+ // The base API target info for Spider Cloud.
53
+ exports.APISchema = {
54
+ url: "https://api.spider.cloud",
55
+ versions: {
56
+ current: ApiVersion.V1,
57
+ v1: {
58
+ routes: APIRoutes,
59
+ end_date: "",
60
+ },
61
+ latest: {
62
+ routes: APIRoutes,
63
+ end_date: "",
64
+ },
65
+ },
66
+ };
67
+ // Adjust the Spider Cloud endpoint.
68
+ const setBaseUrl = (url) => {
69
+ if (url) {
70
+ exports.APISchema["url"] = url;
71
+ }
72
+ };
73
+ exports.setBaseUrl = setBaseUrl;
package/dist/index.d.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  export { Spider } from "./client";
2
+ export { Collection, setBaseUrl, APISchema } from "./config";
2
3
  export type { SpiderParams, Budget, Viewport, QueryRequest } from "./config";
3
- export { Collection } from "./config";
package/dist/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Collection = exports.Spider = void 0;
3
+ exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
4
4
  var client_1 = require("./client");
5
5
  Object.defineProperty(exports, "Spider", { enumerable: true, get: function () { return client_1.Spider; } });
6
6
  var config_1 = require("./config");
7
7
  Object.defineProperty(exports, "Collection", { enumerable: true, get: function () { return config_1.Collection; } });
8
+ Object.defineProperty(exports, "setBaseUrl", { enumerable: true, get: function () { return config_1.setBaseUrl; } });
9
+ Object.defineProperty(exports, "APISchema", { enumerable: true, get: function () { return config_1.APISchema; } });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@spider-cloud/spider-client",
3
- "version": "0.0.59",
3
+ "version": "0.0.61",
4
4
  "description": "Isomorphic Javascript SDK for Spider Cloud services",
5
5
  "scripts": {
6
6
  "test": "node --import tsx --test __tests__/*test.ts",