@spider-cloud/spider-client 0.0.40 → 0.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -6
- package/dist/client.d.ts +15 -5
- package/dist/client.js +27 -10
- package/dist/config.d.ts +8 -0
- package/dist/utils/process-chunk.d.ts +2 -0
- package/dist/utils/process-chunk.js +13 -0
- package/dist/utils/stream-reader.d.ts +2 -0
- package/dist/utils/stream-reader.js +29 -0
- package/package.json +5 -35
package/README.md
CHANGED
|
@@ -59,6 +59,34 @@ app
|
|
|
59
59
|
});
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
+
A real world crawl example streaming the response.
|
|
63
|
+
|
|
64
|
+
```javascript
|
|
65
|
+
import { Spider } from "@spider-cloud/spider-client";
|
|
66
|
+
|
|
67
|
+
// Initialize the SDK with your API key
|
|
68
|
+
const app = new Spider({ apiKey: "YOUR_API_KEY" });
|
|
69
|
+
|
|
70
|
+
// The target URL
|
|
71
|
+
const url = "https://spider.cloud";
|
|
72
|
+
|
|
73
|
+
// Crawl a website
|
|
74
|
+
const crawlParams = {
|
|
75
|
+
limit: 5,
|
|
76
|
+
store_data: false,
|
|
77
|
+
metadata: true,
|
|
78
|
+
request: "http",
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const stream = true;
|
|
82
|
+
|
|
83
|
+
const streamCallback = (data) => {
|
|
84
|
+
console.log(data["url"]);
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
app.crawlUrl(url, crawlParams, stream, streamCallback);
|
|
88
|
+
```
|
|
89
|
+
|
|
62
90
|
### Data Operations
|
|
63
91
|
|
|
64
92
|
The Spider client can interact with specific data tables to create, retrieve, and delete data.
|
|
@@ -124,7 +152,6 @@ spider
|
|
|
124
152
|
|
|
125
153
|
You can use [Supabase](https://supabase.com/docs/reference/javascript) to directly connect to instances and write your own logic. First, you need to install `@supabase/supabase-js` since this package does not include the dependency by default. This keeps the bundle size small and allows for lazy imports of the client.
|
|
126
154
|
|
|
127
|
-
|
|
128
155
|
```ts
|
|
129
156
|
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
|
|
130
157
|
|
|
@@ -149,8 +176,4 @@ Contributions are always welcome! Feel free to open an issue or submit a pull re
|
|
|
149
176
|
|
|
150
177
|
## License
|
|
151
178
|
|
|
152
|
-
The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
|
|
153
|
-
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
```
|
|
179
|
+
The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
|
package/dist/client.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { SpiderParams } from "./config";
|
|
1
|
+
import { ChunkCallbackFunction, SpiderCoreResponse, SpiderParams } from "./config";
|
|
2
2
|
/**
|
|
3
3
|
* Generic params for core request.
|
|
4
4
|
*/
|
|
@@ -60,9 +60,10 @@ export declare class Spider {
|
|
|
60
60
|
* @param {string} url - The URL to start crawling.
|
|
61
61
|
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
|
|
62
62
|
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
|
|
63
|
+
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
|
|
63
64
|
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
|
|
64
65
|
*/
|
|
65
|
-
crawlUrl(url: string, params?: GenericParams, stream?: boolean): Promise<
|
|
66
|
+
crawlUrl(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>;
|
|
66
67
|
/**
|
|
67
68
|
* Retrieves all links from the specified URL.
|
|
68
69
|
* @param {string} url - The URL from which to gather links.
|
|
@@ -71,8 +72,8 @@ export declare class Spider {
|
|
|
71
72
|
*/
|
|
72
73
|
links(url: string, params?: {}): Promise<any>;
|
|
73
74
|
/**
|
|
74
|
-
* Takes a screenshot of the
|
|
75
|
-
* @param {string} url - The URL to screenshot.
|
|
75
|
+
* Takes a screenshot of the website starting from this URL.
|
|
76
|
+
* @param {string} url - The URL to start the screenshot.
|
|
76
77
|
* @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
|
|
77
78
|
* @returns {Promise<any>} The screenshot data.
|
|
78
79
|
*/
|
|
@@ -158,7 +159,16 @@ export declare class Spider {
|
|
|
158
159
|
* Prepares common headers for each API request.
|
|
159
160
|
* @returns {HeadersInit} A headers object for fetch requests.
|
|
160
161
|
*/
|
|
161
|
-
prepareHeaders(): {
|
|
162
|
+
get prepareHeaders(): {
|
|
163
|
+
"Content-Type": string;
|
|
164
|
+
Authorization: string;
|
|
165
|
+
"User-Agent": string;
|
|
166
|
+
};
|
|
167
|
+
/**
|
|
168
|
+
* Prepares common headers for each API request with JSONl content-type suitable for streaming.
|
|
169
|
+
* @returns {HeadersInit} A headers object for fetch requests.
|
|
170
|
+
*/
|
|
171
|
+
get prepareHeadersJsonL(): {
|
|
162
172
|
"Content-Type": string;
|
|
163
173
|
Authorization: string;
|
|
164
174
|
"User-Agent": string;
|
package/dist/client.js
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.Spider = void 0;
|
|
4
4
|
const package_json_1 = require("../package.json");
|
|
5
5
|
const supabase_1 = require("./supabase");
|
|
6
|
+
const stream_reader_1 = require("./utils/stream-reader");
|
|
6
7
|
/**
|
|
7
8
|
* A class to interact with the Spider API.
|
|
8
9
|
*/
|
|
@@ -38,8 +39,8 @@ class Spider {
|
|
|
38
39
|
* @param {boolean} [stream=false] - Whether to stream the response back without parsing.
|
|
39
40
|
* @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
|
|
40
41
|
*/
|
|
41
|
-
async _apiPost(endpoint, data, stream
|
|
42
|
-
const headers = this.prepareHeaders
|
|
42
|
+
async _apiPost(endpoint, data, stream, jsonl) {
|
|
43
|
+
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
|
|
43
44
|
const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
|
|
44
45
|
method: "POST",
|
|
45
46
|
headers: headers,
|
|
@@ -61,7 +62,7 @@ class Spider {
|
|
|
61
62
|
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
62
63
|
*/
|
|
63
64
|
async _apiGet(endpoint) {
|
|
64
|
-
const headers = this.prepareHeaders
|
|
65
|
+
const headers = this.prepareHeaders;
|
|
65
66
|
const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
|
|
66
67
|
method: "GET",
|
|
67
68
|
headers: headers,
|
|
@@ -79,7 +80,7 @@ class Spider {
|
|
|
79
80
|
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
|
|
80
81
|
*/
|
|
81
82
|
async _apiDelete(endpoint) {
|
|
82
|
-
const headers = this.prepareHeaders
|
|
83
|
+
const headers = this.prepareHeaders;
|
|
83
84
|
const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
|
|
84
85
|
method: "DELETE",
|
|
85
86
|
headers,
|
|
@@ -105,10 +106,16 @@ class Spider {
|
|
|
105
106
|
* @param {string} url - The URL to start crawling.
|
|
106
107
|
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
|
|
107
108
|
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
|
|
109
|
+
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
|
|
108
110
|
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
|
|
109
111
|
*/
|
|
110
|
-
async crawlUrl(url, params = {}, stream = false) {
|
|
111
|
-
|
|
112
|
+
async crawlUrl(url, params = {}, stream = false, cb) {
|
|
113
|
+
const jsonl = stream && cb;
|
|
114
|
+
const res = await this._apiPost("crawl", { url: url, ...params }, stream, !!jsonl);
|
|
115
|
+
if (jsonl) {
|
|
116
|
+
return await (0, stream_reader_1.streamReader)(res, cb);
|
|
117
|
+
}
|
|
118
|
+
return res;
|
|
112
119
|
}
|
|
113
120
|
/**
|
|
114
121
|
* Retrieves all links from the specified URL.
|
|
@@ -120,8 +127,8 @@ class Spider {
|
|
|
120
127
|
return this._apiPost("links", { url: url, ...params });
|
|
121
128
|
}
|
|
122
129
|
/**
|
|
123
|
-
* Takes a screenshot of the
|
|
124
|
-
* @param {string} url - The URL to screenshot.
|
|
130
|
+
* Takes a screenshot of the website starting from this URL.
|
|
131
|
+
* @param {string} url - The URL to start the screenshot.
|
|
125
132
|
* @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
|
|
126
133
|
* @returns {Promise<any>} The screenshot data.
|
|
127
134
|
*/
|
|
@@ -190,7 +197,7 @@ class Spider {
|
|
|
190
197
|
...(expiresIn && { expiresIn: expiresIn.toString() }),
|
|
191
198
|
});
|
|
192
199
|
const endpoint = `https://api.spider.cloud/v1/data/storage?${params.toString()}`;
|
|
193
|
-
const headers = this.prepareHeaders
|
|
200
|
+
const headers = this.prepareHeaders;
|
|
194
201
|
const response = await fetch(endpoint, {
|
|
195
202
|
method: "GET",
|
|
196
203
|
headers,
|
|
@@ -243,13 +250,23 @@ class Spider {
|
|
|
243
250
|
* Prepares common headers for each API request.
|
|
244
251
|
* @returns {HeadersInit} A headers object for fetch requests.
|
|
245
252
|
*/
|
|
246
|
-
prepareHeaders() {
|
|
253
|
+
get prepareHeaders() {
|
|
247
254
|
return {
|
|
248
255
|
"Content-Type": "application/json",
|
|
249
256
|
Authorization: `Bearer ${this.apiKey}`,
|
|
250
257
|
"User-Agent": `Spider-Client/${package_json_1.version}`,
|
|
251
258
|
};
|
|
252
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Prepares common headers for each API request with JSONl content-type suitable for streaming.
|
|
262
|
+
* @returns {HeadersInit} A headers object for fetch requests.
|
|
263
|
+
*/
|
|
264
|
+
get prepareHeadersJsonL() {
|
|
265
|
+
return {
|
|
266
|
+
...this.prepareHeaders,
|
|
267
|
+
"Content-Type": "application/jsonl",
|
|
268
|
+
};
|
|
269
|
+
}
|
|
253
270
|
/**
|
|
254
271
|
* Handles errors from API requests.
|
|
255
272
|
* @param {Response} response - The fetch response object.
|
package/dist/config.d.ts
CHANGED
|
@@ -169,3 +169,11 @@ export interface SpiderParams {
|
|
|
169
169
|
*/
|
|
170
170
|
chunking_alg?: ChunkingAlg;
|
|
171
171
|
}
|
|
172
|
+
export type SpiderCoreResponse = {
|
|
173
|
+
data?: string;
|
|
174
|
+
message?: string;
|
|
175
|
+
error?: string;
|
|
176
|
+
status?: number;
|
|
177
|
+
url?: string;
|
|
178
|
+
};
|
|
179
|
+
export type ChunkCallbackFunction = (data: SpiderCoreResponse) => void;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.processChunk = void 0;
|
|
4
|
+
const processChunk = (chunk, cb) => {
|
|
5
|
+
try {
|
|
6
|
+
cb(chunk ? JSON.parse(chunk.trimEnd()) : null);
|
|
7
|
+
return true;
|
|
8
|
+
}
|
|
9
|
+
catch (_error) {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
exports.processChunk = processChunk;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.streamReader = void 0;
|
|
4
|
+
const process_chunk_1 = require("./process-chunk");
|
|
5
|
+
// stream the response via callbacks.
|
|
6
|
+
const streamReader = async (res, cb) => {
|
|
7
|
+
var _a;
|
|
8
|
+
if (res.ok) {
|
|
9
|
+
const reader = (_a = res.body) === null || _a === void 0 ? void 0 : _a.getReader();
|
|
10
|
+
const decoder = new TextDecoder();
|
|
11
|
+
let content = "";
|
|
12
|
+
if (reader) {
|
|
13
|
+
while (true) {
|
|
14
|
+
const { done, value } = await reader.read();
|
|
15
|
+
if (done) {
|
|
16
|
+
break;
|
|
17
|
+
}
|
|
18
|
+
content += decoder.decode(value, { stream: true });
|
|
19
|
+
if ((0, process_chunk_1.processChunk)(content, cb)) {
|
|
20
|
+
content = "";
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (content.length > 0) {
|
|
24
|
+
(0, process_chunk_1.processChunk)(content, cb);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
exports.streamReader = streamReader;
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@spider-cloud/spider-client",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.44",
|
|
4
4
|
"description": "A Javascript SDK for Spider Cloud services",
|
|
5
5
|
"scripts": {
|
|
6
|
-
"test": "
|
|
6
|
+
"test": "node --import tsx --test __tests__/**/*test.ts",
|
|
7
7
|
"build": "tsc",
|
|
8
8
|
"prepublishOnly": "npm test && npm run build"
|
|
9
9
|
},
|
|
@@ -23,40 +23,10 @@
|
|
|
23
23
|
"author": "Jeff Mendez<jeff@a11ywatch.com>",
|
|
24
24
|
"license": "MIT",
|
|
25
25
|
"devDependencies": {
|
|
26
|
-
"@
|
|
27
|
-
"@types/jest": "^29.5.12",
|
|
26
|
+
"@supabase/supabase-js": "^2.44.2",
|
|
28
27
|
"@types/node": "20.14.2",
|
|
29
28
|
"dotenv": "^16.4.5",
|
|
30
|
-
"
|
|
31
|
-
"typescript": "5.4.5"
|
|
32
|
-
"@supabase/supabase-js": "^2.44.2"
|
|
33
|
-
},
|
|
34
|
-
"jest": {
|
|
35
|
-
"preset": "ts-jest",
|
|
36
|
-
"testEnvironment": "node",
|
|
37
|
-
"moduleFileExtensions": [
|
|
38
|
-
"ts",
|
|
39
|
-
"tsx",
|
|
40
|
-
"js",
|
|
41
|
-
"jsx"
|
|
42
|
-
],
|
|
43
|
-
"roots": [
|
|
44
|
-
"<rootDir>/src",
|
|
45
|
-
"<rootDir>/__tests__"
|
|
46
|
-
],
|
|
47
|
-
"transform": {
|
|
48
|
-
"^.+\\\\.tsx?$": "ts-jest"
|
|
49
|
-
},
|
|
50
|
-
"testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx)$",
|
|
51
|
-
"moduleDirectories": [
|
|
52
|
-
"node_modules",
|
|
53
|
-
"src"
|
|
54
|
-
],
|
|
55
|
-
"collectCoverage": true,
|
|
56
|
-
"coverageDirectory": "coverage",
|
|
57
|
-
"coverageReporters": [
|
|
58
|
-
"text",
|
|
59
|
-
"lcov"
|
|
60
|
-
]
|
|
29
|
+
"tsx": "^4.16.2",
|
|
30
|
+
"typescript": "5.4.5"
|
|
61
31
|
}
|
|
62
32
|
}
|