@xcrap/got-scraping-client 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index.mjs → index.js} +1 -0
- package/package.json +11 -10
- package/dist/index.cjs +0 -98
- package/jest.config.ts +0 -5
- package/rollup.config.js +0 -46
- package/tsconfig.json +0 -15
package/package.json
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xcrap/got-scraping-client",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"description": "
|
|
5
|
-
"main": "index.js",
|
|
3
|
+
"version": "0.0.4",
|
|
4
|
+
"description": "Xcrap Got Scraping Client is a package of the Xcrap framework that implements an HTTP client using the Got Scraping library.",
|
|
5
|
+
"main": "./dist/index.js",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist/*",
|
|
10
|
+
"!/**/__tests__"
|
|
11
|
+
],
|
|
6
12
|
"scripts": {
|
|
7
13
|
"test": "jest",
|
|
8
|
-
"build": "
|
|
14
|
+
"build": "tsc"
|
|
9
15
|
},
|
|
10
16
|
"keywords": [
|
|
11
17
|
"web scraping",
|
|
@@ -13,18 +19,13 @@
|
|
|
13
19
|
"http client",
|
|
14
20
|
"got scraping"
|
|
15
21
|
],
|
|
16
|
-
"author": "
|
|
22
|
+
"author": "Marcuth",
|
|
17
23
|
"license": "MIT",
|
|
18
24
|
"type": "commonjs",
|
|
19
25
|
"devDependencies": {
|
|
20
|
-
"@rollup/plugin-commonjs": "^28.0.3",
|
|
21
|
-
"@rollup/plugin-node-resolve": "^16.0.1",
|
|
22
|
-
"@rollup/plugin-typescript": "^12.1.2",
|
|
23
26
|
"@types/node": "^22.13.17",
|
|
24
27
|
"header-generator": "^2.1.63",
|
|
25
|
-
"rollup": "^4.39.0",
|
|
26
28
|
"ts-node": "^10.9.2",
|
|
27
|
-
"tslib": "^2.8.1",
|
|
28
29
|
"typescript": "^5.8.2"
|
|
29
30
|
},
|
|
30
31
|
"dependencies": {
|
package/dist/index.cjs
DELETED
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
exports.GotScrapingClient = void 0;
|
|
5
|
-
const core_1 = require("@xcrap/core");
|
|
6
|
-
const load_esm_1 = require("load-esm");
|
|
7
|
-
class GotScrapingClient extends core_1.BaseClient {
|
|
8
|
-
constructor(options = {}) {
|
|
9
|
-
super(options);
|
|
10
|
-
this.options = options;
|
|
11
|
-
}
|
|
12
|
-
async initGotScraping() {
|
|
13
|
-
var _a;
|
|
14
|
-
const module = await (0, load_esm_1.loadEsm)("got-scraping");
|
|
15
|
-
const gotScraping = module.gotScraping;
|
|
16
|
-
this.gotScrapingInstance = gotScraping.extend({
|
|
17
|
-
...this.options,
|
|
18
|
-
headers: {
|
|
19
|
-
"User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent
|
|
20
|
-
},
|
|
21
|
-
proxyUrl: this.currentProxy,
|
|
22
|
-
});
|
|
23
|
-
}
|
|
24
|
-
async ensureGotScraping() {
|
|
25
|
-
if (!this.gotScrapingInstance) {
|
|
26
|
-
await this.initGotScraping();
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
async fetch({ url, maxRetries = 0, retries = 0, retryDelay, method = "GET", ...gotOptions }) {
|
|
30
|
-
await this.ensureGotScraping();
|
|
31
|
-
const failedAttempts = [];
|
|
32
|
-
const attemptRequest = async (currentRetry) => {
|
|
33
|
-
var _a, _b, _c, _d, _e;
|
|
34
|
-
try {
|
|
35
|
-
const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
|
|
36
|
-
const response = await this.gotScrapingInstance({
|
|
37
|
-
url: fullUrl,
|
|
38
|
-
method: method,
|
|
39
|
-
headers: {
|
|
40
|
-
"User-Agent": (_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent,
|
|
41
|
-
...gotOptions.headers
|
|
42
|
-
},
|
|
43
|
-
proxyUrl: this.currentProxy,
|
|
44
|
-
...gotOptions
|
|
45
|
-
});
|
|
46
|
-
if (!this.isSuccess(response.statusCode)) {
|
|
47
|
-
throw new core_1.InvalidStatusCodeError(response.statusCode);
|
|
48
|
-
}
|
|
49
|
-
return new core_1.HttpResponse({
|
|
50
|
-
status: response.statusCode,
|
|
51
|
-
statusText: response.statusMessage || "OK",
|
|
52
|
-
headers: response.headers,
|
|
53
|
-
body: response.body,
|
|
54
|
-
attempts: currentRetry + 1,
|
|
55
|
-
failedAttempts,
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
catch (error) {
|
|
59
|
-
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
60
|
-
failedAttempts.push({ error: errorMessage, timestamp: new Date() });
|
|
61
|
-
if (error.response && currentRetry < maxRetries) {
|
|
62
|
-
if (retryDelay !== undefined && retryDelay > 0) {
|
|
63
|
-
await (0, core_1.delay)(retryDelay);
|
|
64
|
-
}
|
|
65
|
-
return await attemptRequest(currentRetry + 1);
|
|
66
|
-
}
|
|
67
|
-
return new core_1.HttpResponse({
|
|
68
|
-
status: ((_b = error.response) === null || _b === void 0 ? void 0 : _b.statusCode) || 500,
|
|
69
|
-
statusText: ((_c = error.response) === null || _c === void 0 ? void 0 : _c.statusMessage) || "Request Failed",
|
|
70
|
-
body: ((_d = error.response) === null || _d === void 0 ? void 0 : _d.body) || errorMessage,
|
|
71
|
-
headers: ((_e = error.response) === null || _e === void 0 ? void 0 : _e.headers) || {},
|
|
72
|
-
attempts: currentRetry + 1,
|
|
73
|
-
failedAttempts,
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
};
|
|
77
|
-
return await attemptRequest(retries);
|
|
78
|
-
}
|
|
79
|
-
async fetchMany({ requests, concurrency, requestDelay }) {
|
|
80
|
-
const results = [];
|
|
81
|
-
const executing = [];
|
|
82
|
-
for (let i = 0; i < requests.length; i++) {
|
|
83
|
-
const promise = this.executeRequest({
|
|
84
|
-
request: requests[i],
|
|
85
|
-
index: i,
|
|
86
|
-
requestDelay: requestDelay,
|
|
87
|
-
results: results
|
|
88
|
-
}).then(() => undefined);
|
|
89
|
-
executing.push(promise);
|
|
90
|
-
if (this.shouldThrottle(executing, concurrency)) {
|
|
91
|
-
await this.handleConcurrency(executing);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
await Promise.all(executing);
|
|
95
|
-
return results;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
exports.GotScrapingClient = GotScrapingClient;
|
package/jest.config.ts
DELETED
package/rollup.config.js
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
const typescript = require("@rollup/plugin-typescript")
|
|
2
|
-
const resolve = require("@rollup/plugin-node-resolve")
|
|
3
|
-
const commonjs = require("@rollup/plugin-commonjs")
|
|
4
|
-
const path = require("node:path")
|
|
5
|
-
const fs = require("node:fs")
|
|
6
|
-
|
|
7
|
-
function getAllTypeScriptFiles(dir) {
|
|
8
|
-
let files = []
|
|
9
|
-
|
|
10
|
-
fs.readdirSync(dir).forEach(file => {
|
|
11
|
-
const fullPath = path.join(dir, file)
|
|
12
|
-
|
|
13
|
-
if (fs.statSync(fullPath).isDirectory()) {
|
|
14
|
-
files = files.concat(getAllTypeScriptFiles(fullPath))
|
|
15
|
-
} else if (file.endsWith(".ts")) {
|
|
16
|
-
files.push(fullPath)
|
|
17
|
-
}
|
|
18
|
-
})
|
|
19
|
-
|
|
20
|
-
return files
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
const inputFiles = getAllTypeScriptFiles("src")
|
|
24
|
-
|
|
25
|
-
module.exports = [
|
|
26
|
-
{
|
|
27
|
-
input: inputFiles,
|
|
28
|
-
output: {
|
|
29
|
-
dir: "dist",
|
|
30
|
-
format: "cjs",
|
|
31
|
-
entryFileNames: "[name].cjs",
|
|
32
|
-
preserveModules: true,
|
|
33
|
-
},
|
|
34
|
-
plugins: [resolve(), commonjs(), typescript()]
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
input: inputFiles,
|
|
38
|
-
output: {
|
|
39
|
-
dir: "dist",
|
|
40
|
-
format: "esm",
|
|
41
|
-
entryFileNames: "[name].mjs",
|
|
42
|
-
preserveModules: true,
|
|
43
|
-
},
|
|
44
|
-
plugins: [resolve(), commonjs(), typescript()]
|
|
45
|
-
}
|
|
46
|
-
]
|
package/tsconfig.json
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"target": "es2019",
|
|
4
|
-
"module": "NodeNext",
|
|
5
|
-
"declaration": true,
|
|
6
|
-
"esModuleInterop": true,
|
|
7
|
-
"forceConsistentCasingInFileNames": true,
|
|
8
|
-
"strict": true,
|
|
9
|
-
"skipLibCheck": true,
|
|
10
|
-
"outDir": "./dist",
|
|
11
|
-
"rootDir": "./src"
|
|
12
|
-
},
|
|
13
|
-
"include": ["./src"],
|
|
14
|
-
"exclude": ["./node_modules", "./dist"]
|
|
15
|
-
}
|