@spider-cloud/spider-client 0.1.69 → 0.1.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -5
- package/dist/client.d.ts +0 -21
- package/dist/client.js +3 -33
- package/dist/config.d.ts +2 -12
- package/dist/config.js +0 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -45,7 +45,6 @@ app
|
|
|
45
45
|
const crawlParams = {
|
|
46
46
|
limit: 5,
|
|
47
47
|
proxy_enabled: true,
|
|
48
|
-
store_data: false,
|
|
49
48
|
metadata: false,
|
|
50
49
|
request: "http",
|
|
51
50
|
};
|
|
@@ -73,7 +72,6 @@ const url = "https://spider.cloud";
|
|
|
73
72
|
// Crawl a website
|
|
74
73
|
const crawlParams = {
|
|
75
74
|
limit: 5,
|
|
76
|
-
store_data: false,
|
|
77
75
|
metadata: true,
|
|
78
76
|
request: "http",
|
|
79
77
|
};
|
|
@@ -140,9 +138,6 @@ spider
|
|
|
140
138
|
- **`links(url, params)`**: Retrieve all links from the specified URL with optional parameters.
|
|
141
139
|
- **`screenshot(url, params)`**: Take a screenshot of the specified URL.
|
|
142
140
|
- **`transform(data, params)`**: Perform a fast HTML transformation to markdown or text.
|
|
143
|
-
- **`extractContacts(url, params)`**: Extract contact information from the specified URL.
|
|
144
|
-
- **`label(url, params)`**: Apply labeling to data extracted from the specified URL.
|
|
145
|
-
- **`getCrawlState(url, params)`**: Check the website crawl state.
|
|
146
141
|
- **`getCredits()`**: Retrieve account's remaining credits.
|
|
147
142
|
- **`getData(table, params)`**: Retrieve data records from the DB.
|
|
148
143
|
- **`deleteData(table, params)`**: Delete records from the DB.
|
package/dist/client.d.ts
CHANGED
|
@@ -89,27 +89,6 @@ export declare class Spider {
|
|
|
89
89
|
html: string;
|
|
90
90
|
url?: string;
|
|
91
91
|
}[], params?: RequestParamsTransform): Promise<any>;
|
|
92
|
-
/**
|
|
93
|
-
* Extracts leads from a website.
|
|
94
|
-
* @param {string} url - The URL from which to extract contacts.
|
|
95
|
-
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
96
|
-
* @returns {Promise<any>} The contact information extracted.
|
|
97
|
-
*/
|
|
98
|
-
extractContacts(url: string, params?: GenericParams): Promise<any>;
|
|
99
|
-
/**
|
|
100
|
-
* Applies labeling to data extracted from a specified URL.
|
|
101
|
-
* @param {string} url - The URL to label.
|
|
102
|
-
* @param {GenericParams} [params={}] - Configuration parameters for labeling.
|
|
103
|
-
* @returns {Promise<any>} The labeled data.
|
|
104
|
-
*/
|
|
105
|
-
label(url: string, params?: GenericParams): Promise<any>;
|
|
106
|
-
/**
|
|
107
|
-
* Check the crawl state of the website.
|
|
108
|
-
* @param {string} url - The URL to check.
|
|
109
|
-
* @param {GenericParams} [params={}] - Configuration parameters for crawl state. Can also pass in "domain" instead of the url to query.
|
|
110
|
-
* @returns {Promise<any>} The crawl state data.
|
|
111
|
-
*/
|
|
112
|
-
getCrawlState(url: string, params?: GenericParams): Promise<any>;
|
|
113
92
|
/**
|
|
114
93
|
* Create a signed url to download files from the storage.
|
|
115
94
|
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
package/dist/client.js
CHANGED
|
@@ -153,45 +153,15 @@ class Spider {
|
|
|
153
153
|
* @param {object} [params={}] - Configuration parameters for the transformation.
|
|
154
154
|
* @returns {Promise<any>} The transformation result.
|
|
155
155
|
*/
|
|
156
|
-
async transform(data, params
|
|
156
|
+
async transform(data, params) {
|
|
157
157
|
var _a;
|
|
158
158
|
return this._apiPost(config_1.APIRoutes.Transform, {
|
|
159
|
-
...params,
|
|
160
|
-
data: params.data && Array.isArray(params.data) && ((_a = params.data) === null || _a === void 0 ? void 0 : _a.length)
|
|
159
|
+
...(params ? params : {}),
|
|
160
|
+
data: (params === null || params === void 0 ? void 0 : params.data) && Array.isArray(params.data) && ((_a = params.data) === null || _a === void 0 ? void 0 : _a.length)
|
|
161
161
|
? params.data
|
|
162
162
|
: data,
|
|
163
163
|
});
|
|
164
164
|
}
|
|
165
|
-
/**
|
|
166
|
-
* Extracts leads from a website.
|
|
167
|
-
* @param {string} url - The URL from which to extract contacts.
|
|
168
|
-
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
|
|
169
|
-
* @returns {Promise<any>} The contact information extracted.
|
|
170
|
-
*/
|
|
171
|
-
async extractContacts(url, params = {}) {
|
|
172
|
-
return this._apiPost(config_1.APIRoutes.PiplineExtractLeads, {
|
|
173
|
-
url: url,
|
|
174
|
-
...params,
|
|
175
|
-
});
|
|
176
|
-
}
|
|
177
|
-
/**
|
|
178
|
-
* Applies labeling to data extracted from a specified URL.
|
|
179
|
-
* @param {string} url - The URL to label.
|
|
180
|
-
* @param {GenericParams} [params={}] - Configuration parameters for labeling.
|
|
181
|
-
* @returns {Promise<any>} The labeled data.
|
|
182
|
-
*/
|
|
183
|
-
async label(url, params = {}) {
|
|
184
|
-
return this._apiPost(config_1.APIRoutes.PiplineLabel, { url: url, ...params });
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Check the crawl state of the website.
|
|
188
|
-
* @param {string} url - The URL to check.
|
|
189
|
-
* @param {GenericParams} [params={}] - Configuration parameters for crawl state. Can also pass in "domain" instead of the url to query.
|
|
190
|
-
* @returns {Promise<any>} The crawl state data.
|
|
191
|
-
*/
|
|
192
|
-
async getCrawlState(url, params = {}) {
|
|
193
|
-
return this._apiPost(config_1.APIRoutes.DataCrawlState, { url: url, ...params });
|
|
194
|
-
}
|
|
195
165
|
/**
|
|
196
166
|
* Create a signed url to download files from the storage.
|
|
197
167
|
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
|
package/dist/config.d.ts
CHANGED
|
@@ -271,18 +271,10 @@ export interface SpiderParams {
|
|
|
271
271
|
* The user agent string to be used for the request.
|
|
272
272
|
*/
|
|
273
273
|
user_agent?: string;
|
|
274
|
-
/**
|
|
275
|
-
* Specifies whether the response data should be stored.
|
|
276
|
-
*/
|
|
277
|
-
store_data?: boolean;
|
|
278
274
|
/**
|
|
279
275
|
* Use webhooks to send data.
|
|
280
276
|
*/
|
|
281
277
|
webhooks?: WebhookSettings;
|
|
282
|
-
/**
|
|
283
|
-
* Configuration settings for GPT (general purpose texture mappings).
|
|
284
|
-
*/
|
|
285
|
-
gpt_config?: Record<string, any>;
|
|
286
278
|
/**
|
|
287
279
|
* Specifies whether to use fingerprinting protection.
|
|
288
280
|
*/
|
|
@@ -395,7 +387,7 @@ export interface SpiderParams {
|
|
|
395
387
|
*/
|
|
396
388
|
evaluate_on_new_document?: string;
|
|
397
389
|
/**
|
|
398
|
-
* Runs the request using lite_mode:Lite mode reduces data transfer costs by
|
|
390
|
+
* Runs the request using lite_mode:Lite mode reduces data transfer costs by 50%, with trade-offs in speed, accuracy,
|
|
399
391
|
* geo-targeting, and reliability. It’s best suited for non-urgent data collection or when
|
|
400
392
|
* targeting websites with minimal anti-bot protections.
|
|
401
393
|
*/
|
|
@@ -421,7 +413,7 @@ export interface SpiderParams {
|
|
|
421
413
|
*/
|
|
422
414
|
proxy?: Proxy;
|
|
423
415
|
/**
|
|
424
|
-
* Use a remote proxy at ~
|
|
416
|
+
* Use a remote proxy at ~50% reduced cost for file downloads.
|
|
425
417
|
* This requires bringing your own proxy (e.g., static IP tunnel).
|
|
426
418
|
*/
|
|
427
419
|
remote_proxy?: string;
|
|
@@ -504,8 +496,6 @@ export declare enum APIRoutes {
|
|
|
504
496
|
Screenshot = "screenshot",
|
|
505
497
|
Search = "search",
|
|
506
498
|
Transform = "transform",
|
|
507
|
-
PiplineExtractLeads = "pipeline/extract-contacts",
|
|
508
|
-
PiplineLabel = "pipeline/label",
|
|
509
499
|
Data = "data",
|
|
510
500
|
DataCrawlState = "data/crawl_state",
|
|
511
501
|
DataSignUrl = "data/sign-url",
|
package/dist/config.js
CHANGED
|
@@ -40,10 +40,6 @@ var APIRoutes;
|
|
|
40
40
|
APIRoutes["Search"] = "search";
|
|
41
41
|
// Transform HTML to markdown or text.
|
|
42
42
|
APIRoutes["Transform"] = "transform";
|
|
43
|
-
// Pipeline extract leads for a website - emails, phones, etc.
|
|
44
|
-
APIRoutes["PiplineExtractLeads"] = "pipeline/extract-contacts";
|
|
45
|
-
// Pipeline label a website by category using AI and metadata.
|
|
46
|
-
APIRoutes["PiplineLabel"] = "pipeline/label";
|
|
47
43
|
// Dynamic collection routes.
|
|
48
44
|
APIRoutes["Data"] = "data";
|
|
49
45
|
// The last crawl state of a website.
|