@xcrap/got-scraping-client 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -73,7 +73,7 @@ const client = new GotScraingClient({ userAgent: "Mozilla/5.0 (Windows NT 10.0;
73
73
  ```ts
74
74
  function randomUserAgent() {
75
75
  const userAgents = [
76
- "Mozilla/5.0 (iPhone; CPU iPhone OS 9_8_4; like Mac OS X) AppleWebKit/603.37 (KHTML, like Gecko) Chrome/54.0.1244.188 Mobile Safari/601.5", "Mozilla/5.0 (Windows NT 10.3;; en-US) AppleWebKit/537.35 (KHTML, like Gecko) Chrome/47.0.1707.185 Safari/601"
76
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 9_8_4; like Mac OS X) AppleWebKit/603.37 (KHTML, like Gecko) Chrome/54.0.1244.188 Mobile Safari/601.5", "Mozilla/5.0 (Windows NT 10.3;; en-US) AppleWebKit/537.35 (KHTML, like Gecko) Chrome/47.0.1707.185 Safari/601"
77
77
  ]
78
78
 
79
79
  const randomIndex = Math.floor(Math.random() * userAgents.length)
package/dist/index.cjs ADDED
@@ -0,0 +1,98 @@
1
+ 'use strict';
2
+
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.GotScrapingClient = void 0;
5
+ const core_1 = require("@xcrap/core");
6
+ const load_esm_1 = require("load-esm");
7
+ class GotScrapingClient extends core_1.BaseClient {
8
+ constructor(options = {}) {
9
+ super(options);
10
+ this.options = options;
11
+ }
12
+ async initGotScraping() {
13
+ var _a;
14
+ const module = await (0, load_esm_1.loadEsm)("got-scraping");
15
+ const gotScraping = module.gotScraping;
16
+ this.gotScrapingInstance = gotScraping.extend({
17
+ ...this.options,
18
+ headers: {
19
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent
20
+ },
21
+ proxyUrl: this.currentProxy,
22
+ });
23
+ }
24
+ async ensureGotScraping() {
25
+ if (!this.gotScrapingInstance) {
26
+ await this.initGotScraping();
27
+ }
28
+ }
29
+ async fetch({ url, maxRetries = 0, retries = 0, retryDelay, method = "GET", ...gotOptions }) {
30
+ await this.ensureGotScraping();
31
+ const failedAttempts = [];
32
+ const attemptRequest = async (currentRetry) => {
33
+ var _a, _b, _c, _d, _e;
34
+ try {
35
+ const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
36
+ const response = await this.gotScrapingInstance({
37
+ url: fullUrl,
38
+ method: method,
39
+ headers: {
40
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent,
41
+ ...gotOptions.headers
42
+ },
43
+ proxyUrl: this.currentProxy,
44
+ ...gotOptions
45
+ });
46
+ if (!this.isSuccess(response.statusCode)) {
47
+ throw new core_1.InvalidStatusCodeError(response.statusCode);
48
+ }
49
+ return new core_1.HttpResponse({
50
+ status: response.statusCode,
51
+ statusText: response.statusMessage || "OK",
52
+ headers: response.headers,
53
+ body: response.body,
54
+ attempts: currentRetry + 1,
55
+ failedAttempts,
56
+ });
57
+ }
58
+ catch (error) {
59
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
60
+ failedAttempts.push({ error: errorMessage, timestamp: new Date() });
61
+ if (error.response && currentRetry < maxRetries) {
62
+ if (retryDelay !== undefined && retryDelay > 0) {
63
+ await (0, core_1.delay)(retryDelay);
64
+ }
65
+ return await attemptRequest(currentRetry + 1);
66
+ }
67
+ return new core_1.HttpResponse({
68
+ status: ((_b = error.response) === null || _b === void 0 ? void 0 : _b.statusCode) || 500,
69
+ statusText: ((_c = error.response) === null || _c === void 0 ? void 0 : _c.statusMessage) || "Request Failed",
70
+ body: ((_d = error.response) === null || _d === void 0 ? void 0 : _d.body) || errorMessage,
71
+ headers: ((_e = error.response) === null || _e === void 0 ? void 0 : _e.headers) || {},
72
+ attempts: currentRetry + 1,
73
+ failedAttempts,
74
+ });
75
+ }
76
+ };
77
+ return await attemptRequest(retries);
78
+ }
79
+ async fetchMany({ requests, concurrency, requestDelay }) {
80
+ const results = [];
81
+ const executing = [];
82
+ for (let i = 0; i < requests.length; i++) {
83
+ const promise = this.executeRequest({
84
+ request: requests[i],
85
+ index: i,
86
+ requestDelay: requestDelay,
87
+ results: results
88
+ }).then(() => undefined);
89
+ executing.push(promise);
90
+ if (this.shouldThrottle(executing, concurrency)) {
91
+ await this.handleConcurrency(executing);
92
+ }
93
+ }
94
+ await Promise.all(executing);
95
+ return results;
96
+ }
97
+ }
98
+ exports.GotScrapingClient = GotScrapingClient;
@@ -0,0 +1,15 @@
1
+ import { ClientInterface, ClientFetchManyOptions, ClientRequestOptions, BaseClient, BaseClientOptions, HttpResponse } from "@xcrap/core";
2
+ import { ExtendedExtendOptions, GotScraping } from "got-scraping";
3
+ export type GotScrapingProxy = string;
4
+ export type GotScrapingRequestOptions = ClientRequestOptions & ExtendedExtendOptions;
5
+ export type GotScrapingFetchManyOptions = ClientFetchManyOptions<GotScrapingRequestOptions>;
6
+ export type GotScrapingClientOptions = BaseClientOptions<GotScrapingProxy> & ExtendedExtendOptions;
7
+ export declare class GotScrapingClient extends BaseClient<GotScrapingProxy> implements ClientInterface {
8
+ readonly options: GotScrapingClientOptions;
9
+ protected gotScrapingInstance: GotScraping | undefined;
10
+ constructor(options?: GotScrapingClientOptions);
11
+ protected initGotScraping(): Promise<void>;
12
+ protected ensureGotScraping(): Promise<void>;
13
+ fetch({ url, maxRetries, retries, retryDelay, method, ...gotOptions }: GotScrapingRequestOptions): Promise<HttpResponse>;
14
+ fetchMany({ requests, concurrency, requestDelay }: GotScrapingFetchManyOptions): Promise<HttpResponse[]>;
15
+ }
package/dist/index.mjs ADDED
@@ -0,0 +1,96 @@
1
+ Object.defineProperty(exports, "__esModule", { value: true });
2
+ exports.GotScrapingClient = void 0;
3
+ const core_1 = require("@xcrap/core");
4
+ const load_esm_1 = require("load-esm");
5
+ class GotScrapingClient extends core_1.BaseClient {
6
+ constructor(options = {}) {
7
+ super(options);
8
+ this.options = options;
9
+ }
10
+ async initGotScraping() {
11
+ var _a;
12
+ const module = await (0, load_esm_1.loadEsm)("got-scraping");
13
+ const gotScraping = module.gotScraping;
14
+ this.gotScrapingInstance = gotScraping.extend({
15
+ ...this.options,
16
+ headers: {
17
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent
18
+ },
19
+ proxyUrl: this.currentProxy,
20
+ });
21
+ }
22
+ async ensureGotScraping() {
23
+ if (!this.gotScrapingInstance) {
24
+ await this.initGotScraping();
25
+ }
26
+ }
27
+ async fetch({ url, maxRetries = 0, retries = 0, retryDelay, method = "GET", ...gotOptions }) {
28
+ await this.ensureGotScraping();
29
+ const failedAttempts = [];
30
+ const attemptRequest = async (currentRetry) => {
31
+ var _a, _b, _c, _d, _e;
32
+ try {
33
+ const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
34
+ const response = await this.gotScrapingInstance({
35
+ url: fullUrl,
36
+ method: method,
37
+ headers: {
38
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent,
39
+ ...gotOptions.headers
40
+ },
41
+ proxyUrl: this.currentProxy,
42
+ ...gotOptions
43
+ });
44
+ if (!this.isSuccess(response.statusCode)) {
45
+ throw new core_1.InvalidStatusCodeError(response.statusCode);
46
+ }
47
+ return new core_1.HttpResponse({
48
+ status: response.statusCode,
49
+ statusText: response.statusMessage || "OK",
50
+ headers: response.headers,
51
+ body: response.body,
52
+ attempts: currentRetry + 1,
53
+ failedAttempts,
54
+ });
55
+ }
56
+ catch (error) {
57
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
58
+ failedAttempts.push({ error: errorMessage, timestamp: new Date() });
59
+ if (error.response && currentRetry < maxRetries) {
60
+ if (retryDelay !== undefined && retryDelay > 0) {
61
+ await (0, core_1.delay)(retryDelay);
62
+ }
63
+ return await attemptRequest(currentRetry + 1);
64
+ }
65
+ return new core_1.HttpResponse({
66
+ status: ((_b = error.response) === null || _b === void 0 ? void 0 : _b.statusCode) || 500,
67
+ statusText: ((_c = error.response) === null || _c === void 0 ? void 0 : _c.statusMessage) || "Request Failed",
68
+ body: ((_d = error.response) === null || _d === void 0 ? void 0 : _d.body) || errorMessage,
69
+ headers: ((_e = error.response) === null || _e === void 0 ? void 0 : _e.headers) || {},
70
+ attempts: currentRetry + 1,
71
+ failedAttempts,
72
+ });
73
+ }
74
+ };
75
+ return await attemptRequest(retries);
76
+ }
77
+ async fetchMany({ requests, concurrency, requestDelay }) {
78
+ const results = [];
79
+ const executing = [];
80
+ for (let i = 0; i < requests.length; i++) {
81
+ const promise = this.executeRequest({
82
+ request: requests[i],
83
+ index: i,
84
+ requestDelay: requestDelay,
85
+ results: results
86
+ }).then(() => undefined);
87
+ executing.push(promise);
88
+ if (this.shouldThrottle(executing, concurrency)) {
89
+ await this.handleConcurrency(executing);
90
+ }
91
+ }
92
+ await Promise.all(executing);
93
+ return results;
94
+ }
95
+ }
96
+ exports.GotScrapingClient = GotScrapingClient;
package/package.json CHANGED
@@ -1,14 +1,20 @@
1
1
  {
2
2
  "name": "@xcrap/got-scraping-client",
3
- "version": "0.0.1",
4
- "description": "",
3
+ "version": "0.0.3",
4
+ "description": "**Xcrap Got Scraping Client** is a package of the Xcrap framework that implements an HTTP client using the [Got Scraping](https://www.npmjs.com/package/got-scraping) library.",
5
5
  "main": "index.js",
6
6
  "scripts": {
7
- "test": "echo \"Error: no test specified\" && exit 1"
7
+ "test": "jest",
8
+ "build": "rollup -c"
8
9
  },
9
- "keywords": [],
10
- "author": "",
11
- "license": "ISC",
10
+ "keywords": [
11
+ "web scraping",
12
+ "xcrap",
13
+ "http client",
14
+ "got scraping"
15
+ ],
16
+ "author": "Marcuthj",
17
+ "license": "MIT",
12
18
  "type": "commonjs",
13
19
  "devDependencies": {
14
20
  "@rollup/plugin-commonjs": "^28.0.3",
@@ -28,5 +34,13 @@
28
34
  "peerDependencies": {
29
35
  "@xcrap/core": "^0.0.3",
30
36
  "@xcrap/parser": "^0.0.2"
31
- }
37
+ },
38
+ "repository": {
39
+ "type": "git",
40
+ "url": "git+https://github.com/Xcrap-Cloud/got-scraping-client.git"
41
+ },
42
+ "bugs": {
43
+ "url": "https://github.com/Xcrap-Cloud/got-scraping-client/issues"
44
+ },
45
+ "homepage": "https://github.com/Xcrap-Cloud/got-scraping-client#readme"
32
46
  }