@spider-cloud/spider-client 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1 @@
1
+ ../LICENSE
package/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # Spider Cloud JavaScript SDK
2
+
3
+ The Spider Cloud JavaScript SDK offers a streamlined set of tools for web scraping and crawling, with capabilities that allow for comprehensive data extraction suitable for interfacing with AI language models. This SDK makes it easy to interact programmatically with the Spider Cloud API from any JavaScript or Node.js application.
4
+
5
+ ## Installation
6
+
7
+ You can install the Spider Cloud JavaScript SDK via npm:
8
+
9
+ ```bash
10
+ npm install @spider-cloud/spider-client
11
+ ```
12
+
13
+ Or with yarn:
14
+
15
+ ```bash
16
+ yarn add @spider-cloud/spider-client
17
+ ```
18
+
19
+ ## Configuration
20
+
21
+ Before using the SDK, you will need to provide it with your API key. Obtain an API key from [spider.cloud](https://spider.cloud) and either pass it directly to the constructor or set it as an environment variable `SPIDER_API_KEY`.
22
+
23
+ ## Usage
24
+
25
+ Here's a basic example to demonstrate how to use the SDK:
26
+
27
+ ```javascript
28
+ import Spider from "spider-client";
29
+
30
+ // Initialize the SDK with your API key
31
+ const app = new Spider("your_api_key");
32
+
33
+ // Scrape a URL
34
+ const url = "https://spiderwebai.xyz";
35
+ app
36
+ .scrapeUrl(url)
37
+ .then((data) => {
38
+ console.log("Scraped Data:", data);
39
+ })
40
+ .catch((error) => {
41
+ console.error("Scrape Error:", error);
42
+ });
43
+
44
+ // Crawl a website
45
+ const crawlParams = {
46
+ limit: 5,
47
+ proxy_enabled: true,
48
+ store_data: false,
49
+ metadata: false,
50
+ request: "http",
51
+ };
52
+ app
53
+ .crawlUrl(url, crawlParams)
54
+ .then((result) => {
55
+ console.log("Crawl Result:", result);
56
+ })
57
+ .catch((error) => {
58
+ console.error("Crawl Error:", error);
59
+ });
60
+ ```
61
+
62
+ ### Available Methods
63
+
64
+ - **`scrapeUrl(url, params)`**: Scrape data from a specified URL. Optional parameters can be passed to customize the scraping behavior.
65
+ - **`crawlUrl(url, params, stream)`**: Begin crawling from a specific URL with optional parameters for customization and an optional streaming response.
66
+ - **`links(url, params)`**: Retrieve all links from the specified URL with optional parameters.
67
+ - **`screenshot(url, params)`**: Take a screenshot of the specified URL.
68
+ - **`extractContacts(url, params)`**: Extract contact information from the specified URL.
69
+ - **`label(url, params)`**: Apply labeling to data extracted from the specified URL.
70
+ - **`getCredits()`**: Retrieve account's remaining credits.
71
+
72
+ ## Error Handling
73
+
74
+ The SDK provides robust error handling and will throw exceptions when it encounters critical issues. Always use `.catch()` on promises to handle these errors gracefully.
75
+
76
+ ## Contributing
77
+
78
+ Contributions are always welcome! Feel free to open an issue or submit a pull request on our GitHub repository.
79
+
80
+ ## License
81
+
82
+ The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,90 @@
1
+ /// <reference types="node" />
2
+ /**
3
+ * A class to interact with the SpiderWeb AI API.
4
+ */
5
+ export default class Spider {
6
+ private apiKey?;
7
+ /**
8
+ * Create an instance of Spider.
9
+ * @param {string | null} apiKey - The API key used to authenticate to the SpiderWeb AI API. If null, attempts to source from environment variables.
10
+ * @throws Will throw an error if the API key is not provided.
11
+ */
12
+ constructor(apiKey?: string);
13
+ /**
14
+ * Internal method to handle POST requests.
15
+ * @param {string} endpoint - The API endpoint to which the POST request should be sent.
16
+ * @param {Record<string, any>} data - The JSON data to be sent in the request body.
17
+ * @param {boolean} [stream=false] - Whether to stream the response back without parsing.
18
+ * @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
19
+ */
20
+ private _apiPost;
21
+ /**
22
+ * Internal method to handle GET requests.
23
+ * @param {string} endpoint - The API endpoint from which data should be retrieved.
24
+ * @returns {Promise<any>} The data returned from the endpoint in JSON format.
25
+ */
26
+ private _apiGet;
27
+ /**
28
+ * Scrapes data from a specified URL.
29
+ * @param {string} url - The URL to scrape.
30
+ * @param {object} [params={}] - Additional parameters for the scraping request.
31
+ * @returns {Promise<any>} The scraped data from the URL.
32
+ */
33
+ scrapeUrl(url: string, params?: {}): Promise<unknown>;
34
+ /**
35
+ * Initiates a crawling job starting from the specified URL.
36
+ * @param {string} url - The URL to start crawling.
37
+ * @param {object} [params={}] - Additional parameters for the crawl.
38
+ * @param {boolean} [stream=false] - Whether to receive the response as a stream.
39
+ * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
40
+ */
41
+ crawlUrl(url: string, params?: {}, stream?: boolean): Promise<unknown>;
42
+ /**
43
+ * Retrieves all links from the specified URL.
44
+ * @param {string} url - The URL from which to gather links.
45
+ * @param {object} [params={}] - Additional parameters for the request.
46
+ * @returns {Promise<any>} A list of links extracted from the URL.
47
+ */
48
+ links(url: string, params?: {}): Promise<unknown>;
49
+ /**
50
+ * Takes a screenshot of the specified URL.
51
+ * @param {string} url - The URL to screenshot.
52
+ * @param {object} [params={}] - Configuration parameters for the screenshot.
53
+ * @returns {Promise<any>} The screenshot data.
54
+ */
55
+ screenshot(url: string, params?: {}): Promise<unknown>;
56
+ /**
57
+ * Extracts contact information from the specified URL.
58
+ * @param {string} url - The URL from which to extract contacts.
59
+ * @param {object} [params={}] - Configuration parameters for the extraction.
60
+ * @returns {Promise<any>} The contact information extracted.
61
+ */
62
+ extractContacts(url: string, params?: {}): Promise<unknown>;
63
+ /**
64
+ * Applies labeling to data extracted from a specified URL.
65
+ * @param {string} url - The URL to label.
66
+ * @param {object} [params={}] - Configuration parameters for labeling.
67
+ * @returns {Promise<any>} The labeled data.
68
+ */
69
+ label(url: string, params?: {}): Promise<unknown>;
70
+ /**
71
+ * Retrieves the number of credits available on the account.
72
+ * @returns {Promise<any>} The current credit balance.
73
+ */
74
+ getCredits(): Promise<unknown>;
75
+ /**
76
+ * Prepares common headers for each API request.
77
+ * @returns {HeadersInit} A headers object for fetch requests.
78
+ */
79
+ prepareHeaders(): {
80
+ "Content-Type": string;
81
+ Authorization: string;
82
+ };
83
+ /**
84
+ * Handles errors from API requests.
85
+ * @param {Response} response - The fetch response object.
86
+ * @param {string} action - Description of the attempted action.
87
+ * @throws Will throw an error with detailed status information.
88
+ */
89
+ handleError(response: Response, action: string): void;
90
+ }
@@ -0,0 +1,142 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ /**
4
+ * A class to interact with the SpiderWeb AI API.
5
+ */
6
+ class Spider {
7
+ /**
8
+ * Create an instance of Spider.
9
+ * @param {string | null} apiKey - The API key used to authenticate to the SpiderWeb AI API. If null, attempts to source from environment variables.
10
+ * @throws Will throw an error if the API key is not provided.
11
+ */
12
+ constructor(apiKey) {
13
+ this.apiKey = apiKey || process?.env?.SPIDER_API_KEY;
14
+ if (!this.apiKey) {
15
+ throw new Error("No API key provided");
16
+ }
17
+ }
18
+ /**
19
+ * Internal method to handle POST requests.
20
+ * @param {string} endpoint - The API endpoint to which the POST request should be sent.
21
+ * @param {Record<string, any>} data - The JSON data to be sent in the request body.
22
+ * @param {boolean} [stream=false] - Whether to stream the response back without parsing.
23
+ * @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
24
+ */
25
+ async _apiPost(endpoint, data, stream = false) {
26
+ const headers = this.prepareHeaders();
27
+ const response = await fetch(`https://spider.a11ywatch.com/v1/${endpoint}`, {
28
+ method: "POST",
29
+ headers: headers,
30
+ body: JSON.stringify(data),
31
+ });
32
+ if (!stream) {
33
+ if (response.ok) {
34
+ return response.json();
35
+ }
36
+ else {
37
+ this.handleError(response, `post to ${endpoint}`);
38
+ }
39
+ }
40
+ return response;
41
+ }
42
+ /**
43
+ * Internal method to handle GET requests.
44
+ * @param {string} endpoint - The API endpoint from which data should be retrieved.
45
+ * @returns {Promise<any>} The data returned from the endpoint in JSON format.
46
+ */
47
+ async _apiGet(endpoint) {
48
+ const headers = this.prepareHeaders();
49
+ const response = await fetch(`https://spider.a11ywatch.com/v1/${endpoint}`, {
50
+ method: "GET",
51
+ headers: headers,
52
+ });
53
+ if (response.ok) {
54
+ return response.json();
55
+ }
56
+ else {
57
+ this.handleError(response, `get from ${endpoint}`);
58
+ }
59
+ }
60
+ /**
61
+ * Scrapes data from a specified URL.
62
+ * @param {string} url - The URL to scrape.
63
+ * @param {object} [params={}] - Additional parameters for the scraping request.
64
+ * @returns {Promise<any>} The scraped data from the URL.
65
+ */
66
+ async scrapeUrl(url, params = {}) {
67
+ return this._apiPost("crawl", { url: url, budget: '{"*":1}', ...params });
68
+ }
69
+ /**
70
+ * Initiates a crawling job starting from the specified URL.
71
+ * @param {string} url - The URL to start crawling.
72
+ * @param {object} [params={}] - Additional parameters for the crawl.
73
+ * @param {boolean} [stream=false] - Whether to receive the response as a stream.
74
+ * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
75
+ */
76
+ async crawlUrl(url, params = {}, stream = false) {
77
+ return this._apiPost("crawl", { url: url, ...params }, stream);
78
+ }
79
+ /**
80
+ * Retrieves all links from the specified URL.
81
+ * @param {string} url - The URL from which to gather links.
82
+ * @param {object} [params={}] - Additional parameters for the request.
83
+ * @returns {Promise<any>} A list of links extracted from the URL.
84
+ */
85
+ async links(url, params = {}) {
86
+ return this._apiPost("links", { url: url, ...params });
87
+ }
88
+ /**
89
+ * Takes a screenshot of the specified URL.
90
+ * @param {string} url - The URL to screenshot.
91
+ * @param {object} [params={}] - Configuration parameters for the screenshot.
92
+ * @returns {Promise<any>} The screenshot data.
93
+ */
94
+ async screenshot(url, params = {}) {
95
+ return this._apiPost("screenshot", { url: url, ...params });
96
+ }
97
+ /**
98
+ * Extracts contact information from the specified URL.
99
+ * @param {string} url - The URL from which to extract contacts.
100
+ * @param {object} [params={}] - Configuration parameters for the extraction.
101
+ * @returns {Promise<any>} The contact information extracted.
102
+ */
103
+ async extractContacts(url, params = {}) {
104
+ return this._apiPost("pipeline/extract-contacts", { url: url, ...params });
105
+ }
106
+ /**
107
+ * Applies labeling to data extracted from a specified URL.
108
+ * @param {string} url - The URL to label.
109
+ * @param {object} [params={}] - Configuration parameters for labeling.
110
+ * @returns {Promise<any>} The labeled data.
111
+ */
112
+ async label(url, params = {}) {
113
+ return this._apiPost("pipeline/label", { url: url, ...params });
114
+ }
115
+ /**
116
+ * Retrieves the number of credits available on the account.
117
+ * @returns {Promise<any>} The current credit balance.
118
+ */
119
+ async getCredits() {
120
+ return this._apiGet("credits");
121
+ }
122
+ /**
123
+ * Prepares common headers for each API request.
124
+ * @returns {HeadersInit} A headers object for fetch requests.
125
+ */
126
+ prepareHeaders() {
127
+ return {
128
+ "Content-Type": "application/json",
129
+ Authorization: `Bearer ${this.apiKey}`,
130
+ };
131
+ }
132
+ /**
133
+ * Handles errors from API requests.
134
+ * @param {Response} response - The fetch response object.
135
+ * @param {string} action - Description of the attempted action.
136
+ * @throws Will throw an error with detailed status information.
137
+ */
138
+ handleError(response, action) {
139
+ throw new Error(`Failed to ${action}. Status code: ${response.status}.`);
140
+ }
141
+ }
142
+ exports.default = Spider;
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "@spider-cloud/spider-client",
3
+ "version": "0.0.1",
4
+ "description": "A Javascript SDK for Spider Cloud services",
5
+ "private": false,
6
+ "scripts": {
7
+ "test": "jest",
8
+ "build": "tsc",
9
+ "prepublishOnly": "npm test && npm run build"
10
+ },
11
+ "main": "dist/spiderwebai.js",
12
+ "types": "dist/spiderwebai.d.ts",
13
+ "files": [
14
+ "dist/**/*"
15
+ ],
16
+ "keywords": [
17
+ "spiderwebai",
18
+ "sdk",
19
+ "web scraping",
20
+ "api"
21
+ ],
22
+ "author": "Jeff Mendez<jeff@a11ywatch.com>",
23
+ "license": "MIT",
24
+ "devDependencies": {
25
+ "@jest/globals": "^29.7.0",
26
+ "@types/jest": "^29.5.12",
27
+ "@types/node": "20.12.7",
28
+ "ts-jest": "^29.1.2",
29
+ "typescript": "5.4.5"
30
+ },
31
+ "jest": {
32
+ "preset": "ts-jest",
33
+ "testEnvironment": "node",
34
+ "moduleFileExtensions": [
35
+ "ts",
36
+ "tsx",
37
+ "js",
38
+ "jsx"
39
+ ],
40
+ "roots": [
41
+ "<rootDir>/src",
42
+ "<rootDir>/__tests__"
43
+ ],
44
+ "transform": {
45
+ "^.+\\\\.tsx?$": "ts-jest"
46
+ },
47
+ "testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx)$",
48
+ "moduleDirectories": [
49
+ "node_modules",
50
+ "src"
51
+ ],
52
+ "collectCoverage": true,
53
+ "coverageDirectory": "coverage",
54
+ "coverageReporters": [
55
+ "text",
56
+ "lcov"
57
+ ]
58
+ }
59
+ }