@xcrap/got-scraping-client 0.0.1 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -73,7 +73,7 @@ const client = new GotScraingClient({ userAgent: "Mozilla/5.0 (Windows NT 10.0;
73
73
  ```ts
74
74
  function randomUserAgent() {
75
75
  const userAgents = [
76
- "Mozilla/5.0 (iPhone; CPU iPhone OS 9_8_4; like Mac OS X) AppleWebKit/603.37 (KHTML, like Gecko) Chrome/54.0.1244.188 Mobile Safari/601.5", "Mozilla/5.0 (Windows NT 10.3;; en-US) AppleWebKit/537.35 (KHTML, like Gecko) Chrome/47.0.1707.185 Safari/601"
76
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 9_8_4; like Mac OS X) AppleWebKit/603.37 (KHTML, like Gecko) Chrome/54.0.1244.188 Mobile Safari/601.5", "Mozilla/5.0 (Windows NT 10.3;; en-US) AppleWebKit/537.35 (KHTML, like Gecko) Chrome/47.0.1707.185 Safari/601"
77
77
  ]
78
78
 
79
79
  const randomIndex = Math.floor(Math.random() * userAgents.length)
@@ -0,0 +1,15 @@
1
+ import { ClientInterface, ClientFetchManyOptions, ClientRequestOptions, BaseClient, BaseClientOptions, HttpResponse } from "@xcrap/core";
2
+ import { ExtendedExtendOptions, GotScraping } from "got-scraping";
3
+ export type GotScrapingProxy = string;
4
+ export type GotScrapingRequestOptions = ClientRequestOptions & ExtendedExtendOptions;
5
+ export type GotScrapingFetchManyOptions = ClientFetchManyOptions<GotScrapingRequestOptions>;
6
+ export type GotScrapingClientOptions = BaseClientOptions<GotScrapingProxy> & ExtendedExtendOptions;
7
+ export declare class GotScrapingClient extends BaseClient<GotScrapingProxy> implements ClientInterface {
8
+ readonly options: GotScrapingClientOptions;
9
+ protected gotScrapingInstance: GotScraping | undefined;
10
+ constructor(options?: GotScrapingClientOptions);
11
+ protected initGotScraping(): Promise<void>;
12
+ protected ensureGotScraping(): Promise<void>;
13
+ fetch({ url, maxRetries, retries, retryDelay, method, ...gotOptions }: GotScrapingRequestOptions): Promise<HttpResponse>;
14
+ fetchMany({ requests, concurrency, requestDelay }: GotScrapingFetchManyOptions): Promise<HttpResponse[]>;
15
+ }
package/dist/index.js ADDED
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GotScrapingClient = void 0;
4
+ const core_1 = require("@xcrap/core");
5
+ const load_esm_1 = require("load-esm");
6
+ class GotScrapingClient extends core_1.BaseClient {
7
+ constructor(options = {}) {
8
+ super(options);
9
+ this.options = options;
10
+ }
11
+ async initGotScraping() {
12
+ var _a;
13
+ const module = await (0, load_esm_1.loadEsm)("got-scraping");
14
+ const gotScraping = module.gotScraping;
15
+ this.gotScrapingInstance = gotScraping.extend({
16
+ ...this.options,
17
+ headers: {
18
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent
19
+ },
20
+ proxyUrl: this.currentProxy,
21
+ });
22
+ }
23
+ async ensureGotScraping() {
24
+ if (!this.gotScrapingInstance) {
25
+ await this.initGotScraping();
26
+ }
27
+ }
28
+ async fetch({ url, maxRetries = 0, retries = 0, retryDelay, method = "GET", ...gotOptions }) {
29
+ await this.ensureGotScraping();
30
+ const failedAttempts = [];
31
+ const attemptRequest = async (currentRetry) => {
32
+ var _a, _b, _c, _d, _e;
33
+ try {
34
+ const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
35
+ const response = await this.gotScrapingInstance({
36
+ url: fullUrl,
37
+ method: method,
38
+ headers: {
39
+ "User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent,
40
+ ...gotOptions.headers
41
+ },
42
+ proxyUrl: this.currentProxy,
43
+ ...gotOptions
44
+ });
45
+ if (!this.isSuccess(response.statusCode)) {
46
+ throw new core_1.InvalidStatusCodeError(response.statusCode);
47
+ }
48
+ return new core_1.HttpResponse({
49
+ status: response.statusCode,
50
+ statusText: response.statusMessage || "OK",
51
+ headers: response.headers,
52
+ body: response.body,
53
+ attempts: currentRetry + 1,
54
+ failedAttempts,
55
+ });
56
+ }
57
+ catch (error) {
58
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
59
+ failedAttempts.push({ error: errorMessage, timestamp: new Date() });
60
+ if (error.response && currentRetry < maxRetries) {
61
+ if (retryDelay !== undefined && retryDelay > 0) {
62
+ await (0, core_1.delay)(retryDelay);
63
+ }
64
+ return await attemptRequest(currentRetry + 1);
65
+ }
66
+ return new core_1.HttpResponse({
67
+ status: ((_b = error.response) === null || _b === void 0 ? void 0 : _b.statusCode) || 500,
68
+ statusText: ((_c = error.response) === null || _c === void 0 ? void 0 : _c.statusMessage) || "Request Failed",
69
+ body: ((_d = error.response) === null || _d === void 0 ? void 0 : _d.body) || errorMessage,
70
+ headers: ((_e = error.response) === null || _e === void 0 ? void 0 : _e.headers) || {},
71
+ attempts: currentRetry + 1,
72
+ failedAttempts,
73
+ });
74
+ }
75
+ };
76
+ return await attemptRequest(retries);
77
+ }
78
+ async fetchMany({ requests, concurrency, requestDelay }) {
79
+ const results = [];
80
+ const executing = [];
81
+ for (let i = 0; i < requests.length; i++) {
82
+ const promise = this.executeRequest({
83
+ request: requests[i],
84
+ index: i,
85
+ requestDelay: requestDelay,
86
+ results: results
87
+ }).then(() => undefined);
88
+ executing.push(promise);
89
+ if (this.shouldThrottle(executing, concurrency)) {
90
+ await this.handleConcurrency(executing);
91
+ }
92
+ }
93
+ await Promise.all(executing);
94
+ return results;
95
+ }
96
+ }
97
+ exports.GotScrapingClient = GotScrapingClient;
package/package.json CHANGED
@@ -1,24 +1,31 @@
1
1
  {
2
2
  "name": "@xcrap/got-scraping-client",
3
- "version": "0.0.1",
4
- "description": "",
5
- "main": "index.js",
3
+ "version": "0.0.4",
4
+ "description": "Xcrap Got Scraping Client is a package of the Xcrap framework that implements an HTTP client using the Got Scraping library.",
5
+ "main": "./dist/index.js",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "files": [
9
+ "dist/*",
10
+ "!/**/__tests__"
11
+ ],
6
12
  "scripts": {
7
- "test": "echo \"Error: no test specified\" && exit 1"
13
+ "test": "jest",
14
+ "build": "tsc"
8
15
  },
9
- "keywords": [],
10
- "author": "",
11
- "license": "ISC",
16
+ "keywords": [
17
+ "web scraping",
18
+ "xcrap",
19
+ "http client",
20
+ "got scraping"
21
+ ],
22
+ "author": "Marcuth",
23
+ "license": "MIT",
12
24
  "type": "commonjs",
13
25
  "devDependencies": {
14
- "@rollup/plugin-commonjs": "^28.0.3",
15
- "@rollup/plugin-node-resolve": "^16.0.1",
16
- "@rollup/plugin-typescript": "^12.1.2",
17
26
  "@types/node": "^22.13.17",
18
27
  "header-generator": "^2.1.63",
19
- "rollup": "^4.39.0",
20
28
  "ts-node": "^10.9.2",
21
- "tslib": "^2.8.1",
22
29
  "typescript": "^5.8.2"
23
30
  },
24
31
  "dependencies": {
@@ -28,5 +35,13 @@
28
35
  "peerDependencies": {
29
36
  "@xcrap/core": "^0.0.3",
30
37
  "@xcrap/parser": "^0.0.2"
31
- }
38
+ },
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "git+https://github.com/Xcrap-Cloud/got-scraping-client.git"
42
+ },
43
+ "bugs": {
44
+ "url": "https://github.com/Xcrap-Cloud/got-scraping-client/issues"
45
+ },
46
+ "homepage": "https://github.com/Xcrap-Cloud/got-scraping-client#readme"
32
47
  }
package/jest.config.ts DELETED
@@ -1,5 +0,0 @@
1
- export default {
2
- preset: "ts-jest",
3
- testEnvironment: "node",
4
- cache: true
5
- }
package/rollup.config.js DELETED
@@ -1,46 +0,0 @@
1
- const typescript = require("@rollup/plugin-typescript")
2
- const resolve = require("@rollup/plugin-node-resolve")
3
- const commonjs = require("@rollup/plugin-commonjs")
4
- const path = require("node:path")
5
- const fs = require("node:fs")
6
-
7
- function getAllTypeScriptFiles(dir) {
8
- let files = []
9
-
10
- fs.readdirSync(dir).forEach(file => {
11
- const fullPath = path.join(dir, file)
12
-
13
- if (fs.statSync(fullPath).isDirectory()) {
14
- files = files.concat(getAllTypeScriptFiles(fullPath))
15
- } else if (file.endsWith(".ts")) {
16
- files.push(fullPath)
17
- }
18
- })
19
-
20
- return files
21
- }
22
-
23
- const inputFiles = getAllTypeScriptFiles("src")
24
-
25
- module.exports = [
26
- {
27
- input: inputFiles,
28
- output: {
29
- dir: "dist",
30
- format: "cjs",
31
- entryFileNames: "[name].cjs",
32
- preserveModules: true,
33
- },
34
- plugins: [resolve(), commonjs(), typescript()]
35
- },
36
- {
37
- input: inputFiles,
38
- output: {
39
- dir: "dist",
40
- format: "esm",
41
- entryFileNames: "[name].mjs",
42
- preserveModules: true,
43
- },
44
- plugins: [resolve(), commonjs(), typescript()]
45
- }
46
- ]
package/tsconfig.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "es2019",
4
- "module": "NodeNext",
5
- "declaration": true,
6
- "esModuleInterop": true,
7
- "forceConsistentCasingInFileNames": true,
8
- "strict": true,
9
- "skipLibCheck": true,
10
- "outDir": "./dist",
11
- "rootDir": "./src"
12
- },
13
- "include": ["./src"],
14
- "exclude": ["./node_modules", "./dist"]
15
- }