@spider-cloud/spider-client 0.0.58 → 0.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -176,4 +176,4 @@ Contributions are always welcome! Feel free to open an issue or submit a pull re
176
176
 
177
177
  ## License
178
178
 
179
- The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
179
+ The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
package/dist/client.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { ChunkCallbackFunction, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
1
+ import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
2
+ export declare const baseUrl: string;
2
3
  /**
3
4
  * Generic params for core request.
4
5
  */
@@ -96,7 +97,7 @@ export declare class Spider {
96
97
  url?: string;
97
98
  }[], params?: {}): Promise<any>;
98
99
  /**
99
- * Extracts contact information from the specified URL.
100
+ * Extracts leads from a website.
100
101
  * @param {string} url - The URL from which to extract contacts.
101
102
  * @param {GenericParams} [params={}] - Configuration parameters for the extraction.
102
103
  * @returns {Promise<any>} The contact information extracted.
@@ -124,11 +125,13 @@ export declare class Spider {
124
125
 
125
126
  * @returns {Promise<Response>} The response containing the file stream.
126
127
  */
127
- createSignedUrl(domain?: string, options?: {
128
+ createSignedUrl(url?: string, options?: {
128
129
  page?: number;
129
130
  limit?: number;
130
131
  expiresIn?: number;
131
- }, raw?: boolean): Promise<Response>;
132
+ domain?: string;
133
+ pathname?: string;
134
+ }): Promise<any>;
132
135
  /**
133
136
  * Retrieves the number of credits available on the account.
134
137
  * @returns {Promise<any>} The current credit balance.
@@ -140,14 +143,20 @@ export declare class Spider {
140
143
  * @param {object} data - The data to be inserted.
141
144
  * @returns {Promise<any>} The response from the server.
142
145
  */
143
- postData(table: string, data: GenericParams | Record<string, any>): Promise<any>;
146
+ postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
144
147
  /**
145
148
  * Send a GET request to retrieve data from a specified table.
146
- * @param {string} table - The table name in the database.
149
+ * @param {Collection} table - The table name in the database.
147
150
  * @param {object} params - The query parameters for data retrieval.
148
151
  * @returns {Promise<any>} The response from the server.
149
152
  */
150
- getData(table: string, params: GenericParams | Record<string, any>): Promise<any>;
153
+ getData(collections: Collection, params: GenericParams | Record<string, any>): Promise<any>;
154
+ /**
155
+ * Download a record. The url is the path of the storage hash returned and not the exact website url.
156
+ * @param {QueryRequest} params - The query parameters for data retrieval.
157
+ * @returns {Promise<any>} The download response from the server.
158
+ */
159
+ download(query: QueryRequest, output?: "text" | "blob"): Promise<any>;
151
160
  /**
152
161
  * Perform a query to get a document.
153
162
  * @param {QueryRequest} params - The query parameters for data retrieval.
@@ -156,11 +165,11 @@ export declare class Spider {
156
165
  query(query: QueryRequest): Promise<any>;
157
166
  /**
158
167
  * Send a DELETE request to remove data from a specified table.
159
- * @param {string} table - The table name in the database.
168
+ * @param {Collection} table - The table name in the database.
160
169
  * @param {object} params - Parameters to identify records to delete.
161
170
  * @returns {Promise<any>} The response from the server.
162
171
  */
163
- deleteData(table: string, params: GenericParams | Record<string, any>): Promise<any>;
172
+ deleteData(collection: Collection, params: GenericParams | Record<string, any>): Promise<any>;
164
173
  /**
165
174
  * Prepares common headers for each API request.
166
175
  * @returns {HeadersInit} A headers object for fetch requests.
package/dist/client.js CHANGED
@@ -1,9 +1,11 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Spider = void 0;
3
+ exports.Spider = exports.baseUrl = void 0;
4
+ const config_1 = require("./config");
4
5
  const package_json_1 = require("../package.json");
5
6
  const supabase_1 = require("./supabase");
6
7
  const stream_reader_1 = require("./utils/stream-reader");
8
+ exports.baseUrl = config_1.APISchema["url"];
7
9
  /**
8
10
  * A class to interact with the Spider API.
9
11
  */
@@ -41,7 +43,7 @@ class Spider {
41
43
  */
42
44
  async _apiPost(endpoint, data, stream, jsonl) {
43
45
  const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
44
- const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
46
+ const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
45
47
  method: "POST",
46
48
  headers: headers,
47
49
  body: JSON.stringify(data),
@@ -63,7 +65,7 @@ class Spider {
63
65
  */
64
66
  async _apiGet(endpoint) {
65
67
  const headers = this.prepareHeaders;
66
- const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
68
+ const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
67
69
  method: "GET",
68
70
  headers: headers,
69
71
  });
@@ -81,7 +83,7 @@ class Spider {
81
83
  */
82
84
  async _apiDelete(endpoint) {
83
85
  const headers = this.prepareHeaders;
84
- const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
86
+ const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
85
87
  method: "DELETE",
86
88
  headers,
87
89
  });
@@ -99,7 +101,7 @@ class Spider {
99
101
  * @returns {Promise<any>} The scraped data from the URL.
100
102
  */
101
103
  async scrapeUrl(url, params = {}) {
102
- return this._apiPost("crawl", { url: url, limit: 1, ...params });
104
+ return this._apiPost(config_1.APIRoutes.Crawl, { url: url, limit: 1, ...params });
103
105
  }
104
106
  /**
105
107
  * Initiates a crawling job starting from the specified URL.
@@ -111,7 +113,7 @@ class Spider {
111
113
  */
112
114
  async crawlUrl(url, params = {}, stream = false, cb) {
113
115
  const jsonl = stream && cb;
114
- const res = await this._apiPost("crawl", { url: url, ...params }, stream, !!jsonl);
116
+ const res = await this._apiPost(config_1.APIRoutes.Crawl, { url: url, ...params }, stream, !!jsonl);
115
117
  if (jsonl) {
116
118
  return await (0, stream_reader_1.streamReader)(res, cb);
117
119
  }
@@ -124,7 +126,7 @@ class Spider {
124
126
  * @returns {Promise<any>} A list of links extracted from the URL.
125
127
  */
126
128
  async links(url, params = {}) {
127
- return this._apiPost("links", { url: url, ...params });
129
+ return this._apiPost(config_1.APIRoutes.Links, { url: url, ...params });
128
130
  }
129
131
  /**
130
132
  * Takes a screenshot of the website starting from this URL.
@@ -133,7 +135,7 @@ class Spider {
133
135
  * @returns {Promise<any>} The screenshot data.
134
136
  */
135
137
  async screenshot(url, params = {}) {
136
- return this._apiPost("screenshot", { url: url, ...params });
138
+ return this._apiPost(config_1.APIRoutes.Screenshot, { url: url, ...params });
137
139
  }
138
140
  /**
139
141
  * Perform a search and gather a list of websites to start crawling and collect resources.
@@ -142,7 +144,7 @@ class Spider {
142
144
  * @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
143
145
  */
144
146
  async search(q, params = {}) {
145
- return this._apiPost("search", { search: q, ...params });
147
+ return this._apiPost(config_1.APIRoutes.Search, { search: q, ...params });
146
148
  }
147
149
  /**
148
150
  * Transform HTML to Markdown or text. You can send up to 10MB of data at once.
@@ -151,16 +153,19 @@ class Spider {
151
153
  * @returns {Promise<any>} The transformation result.
152
154
  */
153
155
  async transform(data, params = {}) {
154
- return this._apiPost("transform", { data, ...params });
156
+ return this._apiPost(config_1.APIRoutes.Transform, { data, ...params });
155
157
  }
156
158
  /**
157
- * Extracts contact information from the specified URL.
159
+ * Extracts leads from a website.
158
160
  * @param {string} url - The URL from which to extract contacts.
159
161
  * @param {GenericParams} [params={}] - Configuration parameters for the extraction.
160
162
  * @returns {Promise<any>} The contact information extracted.
161
163
  */
162
164
  async extractContacts(url, params = {}) {
163
- return this._apiPost("pipeline/extract-contacts", { url: url, ...params });
165
+ return this._apiPost(config_1.APIRoutes.PiplineExtractLeads, {
166
+ url: url,
167
+ ...params,
168
+ });
164
169
  }
165
170
  /**
166
171
  * Applies labeling to data extracted from a specified URL.
@@ -169,7 +174,7 @@ class Spider {
169
174
  * @returns {Promise<any>} The labeled data.
170
175
  */
171
176
  async label(url, params = {}) {
172
- return this._apiPost("pipeline/label", { url: url, ...params });
177
+ return this._apiPost(config_1.APIRoutes.PiplineLabel, { url: url, ...params });
173
178
  }
174
179
  /**
175
180
  * Check the crawl state of the website.
@@ -178,7 +183,7 @@ class Spider {
178
183
  * @returns {Promise<any>} The crawl state data.
179
184
  */
180
185
  async getCrawlState(url, params = {}) {
181
- return this._apiPost("data/crawl_state", { url: url, ...params });
186
+ return this._apiPost(config_1.APIRoutes.DataCrawlState, { url: url, ...params });
182
187
  }
183
188
  /**
184
189
  * Create a signed url to download files from the storage.
@@ -188,36 +193,35 @@ class Spider {
188
193
 
189
194
  * @returns {Promise<Response>} The response containing the file stream.
190
195
  */
191
- async createSignedUrl(domain, options, raw) {
192
- const { page, limit, expiresIn } = options !== null && options !== void 0 ? options : {};
196
+ async createSignedUrl(url, options) {
197
+ const { page, limit, expiresIn, domain, pathname } = options !== null && options !== void 0 ? options : {};
193
198
  const params = new URLSearchParams({
199
+ ...(url && { url }),
194
200
  ...(domain && { domain }),
201
+ ...(pathname && { pathname }),
195
202
  ...(page && { page: page.toString() }),
196
203
  ...(limit && { limit: limit.toString() }),
197
204
  ...(expiresIn && { expiresIn: expiresIn.toString() }),
198
205
  });
199
- const endpoint = `https://api.spider.cloud/v1/data/storage?${params.toString()}`;
206
+ const endpoint = `${exports.baseUrl}/${config_1.APIRoutes.DataSignUrl}?${params.toString()}`;
200
207
  const headers = this.prepareHeaders;
201
208
  const response = await fetch(endpoint, {
202
209
  method: "GET",
203
210
  headers,
204
211
  });
205
- if (!raw) {
206
- if (response.ok) {
207
- return response.json();
208
- }
209
- else {
210
- this.handleError(response, `Failed to download files`);
211
- }
212
+ if (response.ok) {
213
+ return await response.json();
214
+ }
215
+ else {
216
+ this.handleError(response, `Failed to sign files`);
212
217
  }
213
- return response;
214
218
  }
215
219
  /**
216
220
  * Retrieves the number of credits available on the account.
217
221
  * @returns {Promise<any>} The current credit balance.
218
222
  */
219
223
  async getCredits() {
220
- return this._apiGet("credits");
224
+ return this._apiGet("data/credits");
221
225
  }
222
226
  /**
223
227
  * Send a POST request to insert data into a specified table.
@@ -225,17 +229,39 @@ class Spider {
225
229
  * @param {object} data - The data to be inserted.
226
230
  * @returns {Promise<any>} The response from the server.
227
231
  */
228
- async postData(table, data) {
229
- return this._apiPost(`data/${table}`, data);
232
+ async postData(collection, data) {
233
+ return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
230
234
  }
231
235
  /**
232
236
  * Send a GET request to retrieve data from a specified table.
233
- * @param {string} table - The table name in the database.
237
+ * @param {Collection} table - The table name in the database.
234
238
  * @param {object} params - The query parameters for data retrieval.
235
239
  * @returns {Promise<any>} The response from the server.
236
240
  */
237
- async getData(table, params) {
238
- return this._apiGet(`data/${table}?${new URLSearchParams(params).toString()}`);
241
+ async getData(collections, params) {
242
+ return this._apiGet(`${config_1.APIRoutes.Data}/${collections}?${new URLSearchParams(params).toString()}`);
243
+ }
244
+ /**
245
+ * Download a record. The url is the path of the storage hash returned and not the exact website url.
246
+ * @param {QueryRequest} params - The query parameters for data retrieval.
247
+ * @returns {Promise<any>} The download response from the server.
248
+ */
249
+ async download(query, output) {
250
+ const headers = this.prepareHeaders;
251
+ const endpoint = `${config_1.APIRoutes.DataDownload}?${new URLSearchParams(query).toString()}`;
252
+ const response = await fetch(`${exports.baseUrl}/v1/${endpoint}`, {
253
+ method: "GET",
254
+ headers,
255
+ });
256
+ if (response.ok) {
257
+ if (output === "text") {
258
+ return await response.text();
259
+ }
260
+ return await response.blob();
261
+ }
262
+ else {
263
+ this.handleError(response, `get from ${endpoint}`);
264
+ }
239
265
  }
240
266
  /**
241
267
  * Perform a query to get a document.
@@ -243,16 +269,16 @@ class Spider {
243
269
  * @returns {Promise<any>} The response from the server.
244
270
  */
245
271
  async query(query) {
246
- return this._apiGet(`data/query?${new URLSearchParams(query).toString()}`);
272
+ return this._apiGet(`${config_1.APIRoutes.DataQuery}?${new URLSearchParams(query).toString()}`);
247
273
  }
248
274
  /**
249
275
  * Send a DELETE request to remove data from a specified table.
250
- * @param {string} table - The table name in the database.
276
+ * @param {Collection} table - The table name in the database.
251
277
  * @param {object} params - Parameters to identify records to delete.
252
278
  * @returns {Promise<any>} The response from the server.
253
279
  */
254
- async deleteData(table, params) {
255
- return this._apiDelete(`data/${table}?${new URLSearchParams(params).toString()}`);
280
+ async deleteData(collection, params) {
281
+ return this._apiDelete(`${config_1.APIRoutes.Data}/${collection}?${new URLSearchParams(params).toString()}`);
256
282
  }
257
283
  /**
258
284
  * Prepares common headers for each API request.
package/dist/config.d.ts CHANGED
@@ -260,4 +260,48 @@ export type SpiderCoreResponse = {
260
260
  url?: string;
261
261
  };
262
262
  export type ChunkCallbackFunction = (data: SpiderCoreResponse) => void;
263
+ export declare enum Collection {
264
+ Websites = "websites",
265
+ Pages = "pages",
266
+ PagesMetadata = "pages_metadata",
267
+ Contacts = "contacts",
268
+ CrawlState = "crawl_state",
269
+ CrawlLogs = "crawl_logs",
270
+ Profiles = "profiles",
271
+ Credits = "credits",
272
+ Webhooks = "webhooks",
273
+ APIKeys = "api_keys"
274
+ }
275
+ declare enum ApiVersion {
276
+ V1 = "v1"
277
+ }
278
+ export declare enum APIRoutes {
279
+ Crawl = "crawl",
280
+ Links = "links",
281
+ Screenshot = "screenshot",
282
+ Search = "search",
283
+ Transform = "transform",
284
+ PiplineExtractLeads = "pipeline/extract-contacts",
285
+ PiplineLabel = "pipeline/label",
286
+ Data = "data",
287
+ DataCrawlState = "data/crawl_state",
288
+ DataSignUrl = "data/sign-url",
289
+ DataDownload = "data/download",
290
+ DataQuery = "data/query"
291
+ }
292
+ export declare const APISchema: {
293
+ url: string;
294
+ versions: {
295
+ current: ApiVersion;
296
+ v1: {
297
+ routes: typeof APIRoutes;
298
+ end_date: string;
299
+ };
300
+ latest: {
301
+ routes: typeof APIRoutes;
302
+ end_date: string;
303
+ };
304
+ };
305
+ };
306
+ export declare const setBaseUrl: (url: string) => void;
263
307
  export {};
package/dist/config.js CHANGED
@@ -1,2 +1,73 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.setBaseUrl = exports.APISchema = exports.APIRoutes = exports.Collection = void 0;
4
+ // records that you can query
5
+ var Collection;
6
+ (function (Collection) {
7
+ Collection["Websites"] = "websites";
8
+ Collection["Pages"] = "pages";
9
+ Collection["PagesMetadata"] = "pages_metadata";
10
+ // Leads
11
+ Collection["Contacts"] = "contacts";
12
+ Collection["CrawlState"] = "crawl_state";
13
+ Collection["CrawlLogs"] = "crawl_logs";
14
+ Collection["Profiles"] = "profiles";
15
+ Collection["Credits"] = "credits";
16
+ Collection["Webhooks"] = "webhooks";
17
+ Collection["APIKeys"] = "api_keys";
18
+ })(Collection || (exports.Collection = Collection = {}));
19
+ // The API version for Spider
20
+ var ApiVersion;
21
+ (function (ApiVersion) {
22
+ ApiVersion["V1"] = "v1";
23
+ })(ApiVersion || (ApiVersion = {}));
24
+ // The API routes paths.
25
+ var APIRoutes;
26
+ (function (APIRoutes) {
27
+ // Crawl a website to collect the contents. Can be one page or many.
28
+ APIRoutes["Crawl"] = "crawl";
29
+ // Crawl a website to collect the links. Can be one page or many.
30
+ APIRoutes["Links"] = "links";
31
+ // Crawl a website to collect screenshots. Can be one page or many.
32
+ APIRoutes["Screenshot"] = "screenshot";
33
+ // Search for something and optionally crawl the pages or get the results of the search.
34
+ APIRoutes["Search"] = "search";
35
+ // Transform HTML to markdown or text.
36
+ APIRoutes["Transform"] = "transform";
37
+ // Pipeline extract leads for a website - emails, phones, etc.
38
+ APIRoutes["PiplineExtractLeads"] = "pipeline/extract-contacts";
39
+ // Pipeline label a website by category using AI and metadata.
40
+ APIRoutes["PiplineLabel"] = "pipeline/label";
41
+ // Dynamic collection routes.
42
+ APIRoutes["Data"] = "data";
43
+ // The last crawl state of a website.
44
+ APIRoutes["DataCrawlState"] = "data/crawl_state";
45
+ // Sign a file from storage based on the exact url path of the storage or domain - pathname.
46
+ APIRoutes["DataSignUrl"] = "data/sign-url";
47
+ // Download a file from storage based on the exact url path of the storage or domain - pathname.
48
+ APIRoutes["DataDownload"] = "data/download";
49
+ // Perform a query on the global database to grab content without crawling if available.
50
+ APIRoutes["DataQuery"] = "data/query";
51
+ })(APIRoutes || (exports.APIRoutes = APIRoutes = {}));
52
+ // The base API target info for Spider Cloud.
53
+ exports.APISchema = {
54
+ url: "https://api.spider.cloud",
55
+ versions: {
56
+ current: ApiVersion.V1,
57
+ v1: {
58
+ routes: APIRoutes,
59
+ end_date: "",
60
+ },
61
+ latest: {
62
+ routes: APIRoutes,
63
+ end_date: "",
64
+ },
65
+ },
66
+ };
67
+ // Adjust the Spider Cloud endpoint.
68
+ const setBaseUrl = (url) => {
69
+ if (url) {
70
+ exports.APISchema["url"] = url;
71
+ }
72
+ };
73
+ exports.setBaseUrl = setBaseUrl;
package/dist/index.d.ts CHANGED
@@ -1,2 +1,3 @@
1
1
  export { Spider } from "./client";
2
+ export { Collection, setBaseUrl, APISchema } from "./config";
2
3
  export type { SpiderParams, Budget, Viewport, QueryRequest } from "./config";
package/dist/index.js CHANGED
@@ -1,5 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Spider = void 0;
3
+ exports.APISchema = exports.setBaseUrl = exports.Collection = exports.Spider = void 0;
4
4
  var client_1 = require("./client");
5
5
  Object.defineProperty(exports, "Spider", { enumerable: true, get: function () { return client_1.Spider; } });
6
+ var config_1 = require("./config");
7
+ Object.defineProperty(exports, "Collection", { enumerable: true, get: function () { return config_1.Collection; } });
8
+ Object.defineProperty(exports, "setBaseUrl", { enumerable: true, get: function () { return config_1.setBaseUrl; } });
9
+ Object.defineProperty(exports, "APISchema", { enumerable: true, get: function () { return config_1.APISchema; } });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@spider-cloud/spider-client",
3
- "version": "0.0.58",
3
+ "version": "0.0.60",
4
4
  "description": "Isomorphic Javascript SDK for Spider Cloud services",
5
5
  "scripts": {
6
6
  "test": "node --import tsx --test __tests__/*test.ts",