@spider-cloud/spider-client 0.0.40 → 0.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -59,6 +59,34 @@ app
59
59
  });
60
60
  ```
61
61
 
62
+ A real world crawl example streaming the response.
63
+
64
+ ```javascript
65
+ import { Spider } from "@spider-cloud/spider-client";
66
+
67
+ // Initialize the SDK with your API key
68
+ const app = new Spider({ apiKey: "YOUR_API_KEY" });
69
+
70
+ // The target URL
71
+ const url = "https://spider.cloud";
72
+
73
+ // Crawl a website
74
+ const crawlParams = {
75
+ limit: 5,
76
+ store_data: false,
77
+ metadata: true,
78
+ request: "http",
79
+ };
80
+
81
+ const stream = true;
82
+
83
+ const streamCallback = (data) => {
84
+ console.log(data["url"]);
85
+ };
86
+
87
+ app.crawlUrl(url, crawlParams, stream, streamCallback);
88
+ ```
89
+
62
90
  ### Data Operations
63
91
 
64
92
  The Spider client can interact with specific data tables to create, retrieve, and delete data.
@@ -124,7 +152,6 @@ spider
124
152
 
125
153
  You can use [Supabase](https://supabase.com/docs/reference/javascript) to directly connect to instances and write your own logic. First, you need to install `@supabase/supabase-js` since this package does not include the dependency by default. This keeps the bundle size small and allows for lazy imports of the client.
126
154
 
127
-
128
155
  ```ts
129
156
  const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
130
157
 
@@ -149,8 +176,4 @@ Contributions are always welcome! Feel free to open an issue or submit a pull re
149
176
 
150
177
  ## License
151
178
 
152
- The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
153
-
154
- ```
155
-
156
- ```
179
+ The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { SpiderParams } from "./config";
1
+ import { ChunkCallbackFunction, SpiderCoreResponse, SpiderParams } from "./config";
2
2
  /**
3
3
  * Generic params for core request.
4
4
  */
@@ -60,9 +60,10 @@ export declare class Spider {
60
60
  * @param {string} url - The URL to start crawling.
61
61
  * @param {GenericParams} [params={}] - Additional parameters for the crawl.
62
62
  * @param {boolean} [stream=false] - Whether to receive the response as a stream.
63
+ * @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
63
64
  * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
64
65
  */
65
- crawlUrl(url: string, params?: GenericParams, stream?: boolean): Promise<any>;
66
+ crawlUrl(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>;
66
67
  /**
67
68
  * Retrieves all links from the specified URL.
68
69
  * @param {string} url - The URL from which to gather links.
@@ -71,8 +72,8 @@ export declare class Spider {
71
72
  */
72
73
  links(url: string, params?: {}): Promise<any>;
73
74
  /**
74
- * Takes a screenshot of the specified URL.
75
- * @param {string} url - The URL to screenshot.
75
+ * Takes a screenshot of the website starting from this URL.
76
+ * @param {string} url - The URL to start the screenshot.
76
77
  * @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
77
78
  * @returns {Promise<any>} The screenshot data.
78
79
  */
@@ -158,7 +159,16 @@ export declare class Spider {
158
159
  * Prepares common headers for each API request.
159
160
  * @returns {HeadersInit} A headers object for fetch requests.
160
161
  */
161
- prepareHeaders(): {
162
+ get prepareHeaders(): {
163
+ "Content-Type": string;
164
+ Authorization: string;
165
+ "User-Agent": string;
166
+ };
167
+ /**
168
+ * Prepares common headers for each API request with JSONl content-type suitable for streaming.
169
+ * @returns {HeadersInit} A headers object for fetch requests.
170
+ */
171
+ get prepareHeadersJsonL(): {
162
172
  "Content-Type": string;
163
173
  Authorization: string;
164
174
  "User-Agent": string;
package/dist/client.js CHANGED
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Spider = void 0;
4
4
  const package_json_1 = require("../package.json");
5
5
  const supabase_1 = require("./supabase");
6
+ const stream_reader_1 = require("./utils/stream-reader");
6
7
  /**
7
8
  * A class to interact with the Spider API.
8
9
  */
@@ -38,8 +39,8 @@ class Spider {
38
39
  * @param {boolean} [stream=false] - Whether to stream the response back without parsing.
39
40
  * @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
40
41
  */
41
- async _apiPost(endpoint, data, stream = false) {
42
- const headers = this.prepareHeaders();
42
+ async _apiPost(endpoint, data, stream, jsonl) {
43
+ const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
43
44
  const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
44
45
  method: "POST",
45
46
  headers: headers,
@@ -61,7 +62,7 @@ class Spider {
61
62
  * @returns {Promise<any>} The data returned from the endpoint in JSON format.
62
63
  */
63
64
  async _apiGet(endpoint) {
64
- const headers = this.prepareHeaders();
65
+ const headers = this.prepareHeaders;
65
66
  const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
66
67
  method: "GET",
67
68
  headers: headers,
@@ -79,7 +80,7 @@ class Spider {
79
80
  * @returns {Promise<any>} The data returned from the endpoint in JSON format.
80
81
  */
81
82
  async _apiDelete(endpoint) {
82
- const headers = this.prepareHeaders();
83
+ const headers = this.prepareHeaders;
83
84
  const response = await fetch(`https://api.spider.cloud/v1/${endpoint}`, {
84
85
  method: "DELETE",
85
86
  headers,
@@ -105,10 +106,16 @@ class Spider {
105
106
  * @param {string} url - The URL to start crawling.
106
107
  * @param {GenericParams} [params={}] - Additional parameters for the crawl.
107
108
  * @param {boolean} [stream=false] - Whether to receive the response as a stream.
109
+ * @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
108
110
  * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
109
111
  */
110
- async crawlUrl(url, params = {}, stream = false) {
111
- return this._apiPost("crawl", { url: url, ...params }, stream);
112
+ async crawlUrl(url, params = {}, stream = false, cb) {
113
+ const jsonl = stream && cb;
114
+ const res = await this._apiPost("crawl", { url: url, ...params }, stream, !!jsonl);
115
+ if (jsonl) {
116
+ return await (0, stream_reader_1.streamReader)(res, cb);
117
+ }
118
+ return res;
112
119
  }
113
120
  /**
114
121
  * Retrieves all links from the specified URL.
@@ -120,8 +127,8 @@ class Spider {
120
127
  return this._apiPost("links", { url: url, ...params });
121
128
  }
122
129
  /**
123
- * Takes a screenshot of the specified URL.
124
- * @param {string} url - The URL to screenshot.
130
+ * Takes a screenshot of the website starting from this URL.
131
+ * @param {string} url - The URL to start the screenshot.
125
132
  * @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
126
133
  * @returns {Promise<any>} The screenshot data.
127
134
  */
@@ -190,7 +197,7 @@ class Spider {
190
197
  ...(expiresIn && { expiresIn: expiresIn.toString() }),
191
198
  });
192
199
  const endpoint = `https://api.spider.cloud/v1/data/storage?${params.toString()}`;
193
- const headers = this.prepareHeaders();
200
+ const headers = this.prepareHeaders;
194
201
  const response = await fetch(endpoint, {
195
202
  method: "GET",
196
203
  headers,
@@ -243,13 +250,23 @@ class Spider {
243
250
  * Prepares common headers for each API request.
244
251
  * @returns {HeadersInit} A headers object for fetch requests.
245
252
  */
246
- prepareHeaders() {
253
+ get prepareHeaders() {
247
254
  return {
248
255
  "Content-Type": "application/json",
249
256
  Authorization: `Bearer ${this.apiKey}`,
250
257
  "User-Agent": `Spider-Client/${package_json_1.version}`,
251
258
  };
252
259
  }
260
+ /**
261
+ * Prepares common headers for each API request with JSONl content-type suitable for streaming.
262
+ * @returns {HeadersInit} A headers object for fetch requests.
263
+ */
264
+ get prepareHeadersJsonL() {
265
+ return {
266
+ ...this.prepareHeaders,
267
+ "Content-Type": "application/jsonl",
268
+ };
269
+ }
253
270
  /**
254
271
  * Handles errors from API requests.
255
272
  * @param {Response} response - The fetch response object.
package/dist/config.d.ts CHANGED
@@ -169,3 +169,11 @@ export interface SpiderParams {
169
169
  */
170
170
  chunking_alg?: ChunkingAlg;
171
171
  }
172
+ export type SpiderCoreResponse = {
173
+ data?: string;
174
+ message?: string;
175
+ error?: string;
176
+ status?: number;
177
+ url?: string;
178
+ };
179
+ export type ChunkCallbackFunction = (data: SpiderCoreResponse) => void;
@@ -0,0 +1,2 @@
1
+ import type { SpiderCoreResponse } from "../config";
2
+ export declare const processChunk: (chunk: string, cb: (r: SpiderCoreResponse) => void) => boolean;
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.processChunk = void 0;
4
+ const processChunk = (chunk, cb) => {
5
+ try {
6
+ cb(chunk ? JSON.parse(chunk.trimEnd()) : null);
7
+ return true;
8
+ }
9
+ catch (_error) {
10
+ return false;
11
+ }
12
+ };
13
+ exports.processChunk = processChunk;
@@ -0,0 +1,2 @@
1
+ import type { ChunkCallbackFunction } from "../config";
2
+ export declare const streamReader: (res: Response, cb: ChunkCallbackFunction) => Promise<void>;
@@ -0,0 +1,29 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.streamReader = void 0;
4
+ const process_chunk_1 = require("./process-chunk");
5
+ // stream the response via callbacks.
6
+ const streamReader = async (res, cb) => {
7
+ var _a;
8
+ if (res.ok) {
9
+ const reader = (_a = res.body) === null || _a === void 0 ? void 0 : _a.getReader();
10
+ const decoder = new TextDecoder();
11
+ let content = "";
12
+ if (reader) {
13
+ while (true) {
14
+ const { done, value } = await reader.read();
15
+ if (done) {
16
+ break;
17
+ }
18
+ content += decoder.decode(value, { stream: true });
19
+ if ((0, process_chunk_1.processChunk)(content, cb)) {
20
+ content = "";
21
+ }
22
+ }
23
+ if (content.length > 0) {
24
+ (0, process_chunk_1.processChunk)(content, cb);
25
+ }
26
+ }
27
+ }
28
+ };
29
+ exports.streamReader = streamReader;
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "@spider-cloud/spider-client",
3
- "version": "0.0.40",
3
+ "version": "0.0.44",
4
4
  "description": "A Javascript SDK for Spider Cloud services",
5
5
  "scripts": {
6
- "test": "jest",
6
+ "test": "node --import tsx --test __tests__/**/*test.ts",
7
7
  "build": "tsc",
8
8
  "prepublishOnly": "npm test && npm run build"
9
9
  },
@@ -23,40 +23,10 @@
23
23
  "author": "Jeff Mendez<jeff@a11ywatch.com>",
24
24
  "license": "MIT",
25
25
  "devDependencies": {
26
- "@jest/globals": "^29.7.0",
27
- "@types/jest": "^29.5.12",
26
+ "@supabase/supabase-js": "^2.44.2",
28
27
  "@types/node": "20.14.2",
29
28
  "dotenv": "^16.4.5",
30
- "ts-jest": "^29.1.5",
31
- "typescript": "5.4.5",
32
- "@supabase/supabase-js": "^2.44.2"
33
- },
34
- "jest": {
35
- "preset": "ts-jest",
36
- "testEnvironment": "node",
37
- "moduleFileExtensions": [
38
- "ts",
39
- "tsx",
40
- "js",
41
- "jsx"
42
- ],
43
- "roots": [
44
- "<rootDir>/src",
45
- "<rootDir>/__tests__"
46
- ],
47
- "transform": {
48
- "^.+\\\\.tsx?$": "ts-jest"
49
- },
50
- "testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx)$",
51
- "moduleDirectories": [
52
- "node_modules",
53
- "src"
54
- ],
55
- "collectCoverage": true,
56
- "coverageDirectory": "coverage",
57
- "coverageReporters": [
58
- "text",
59
- "lcov"
60
- ]
29
+ "tsx": "^4.16.2",
30
+ "typescript": "5.4.5"
61
31
  }
62
32
  }