@xcrap/puppeteer-client 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ "use strict";
1
2
  Object.defineProperty(exports, "__esModule", { value: true });
2
3
  exports.defaultPuppeteerActionType = void 0;
3
4
  exports.defaultPuppeteerActionType = "afterRequest";
package/dist/index.js ADDED
@@ -0,0 +1,158 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.PuppeteerClient = exports.PuppeteerClientActionType = void 0;
7
+ const core_1 = require("@xcrap/core");
8
+ const puppeteer_1 = __importDefault(require("puppeteer"));
9
+ const constants_1 = require("./constants");
10
+ var PuppeteerClientActionType;
11
+ (function (PuppeteerClientActionType) {
12
+ PuppeteerClientActionType["BeforeRequest"] = "beforeRequest";
13
+ PuppeteerClientActionType["AfterRequest"] = "afterRequest";
14
+ })(PuppeteerClientActionType || (exports.PuppeteerClientActionType = PuppeteerClientActionType = {}));
15
+ class PuppeteerClient extends core_1.BaseClient {
16
+ constructor(options = {}) {
17
+ super(options);
18
+ this.options = options;
19
+ this.options = options;
20
+ this.browser = undefined;
21
+ }
22
+ async initBrowser() {
23
+ const puppeteerArguments = [];
24
+ if (this.proxy) {
25
+ puppeteerArguments.push(`--proxy-server=${this.currentProxy}`);
26
+ }
27
+ if (this.options.args && this.options.args.length > 0) {
28
+ puppeteerArguments.push(...this.options.args);
29
+ }
30
+ this.browser = await puppeteer_1.default.launch({
31
+ ...this.options,
32
+ args: puppeteerArguments,
33
+ headless: this.options.headless ? "shell" : false
34
+ });
35
+ }
36
+ async ensureBrowser() {
37
+ if (!this.browser) {
38
+ await this.initBrowser();
39
+ }
40
+ }
41
+ async closeBrowser() {
42
+ if (this.browser) {
43
+ await this.browser.close();
44
+ this.browser = undefined;
45
+ }
46
+ }
47
+ async configurePage(page, { javaScriptEnabled }) {
48
+ var _a;
49
+ if (this.currentUserAgent) {
50
+ await page.setUserAgent((_a = this.currentUserAgent) !== null && _a !== void 0 ? _a : core_1.defaultUserAgent);
51
+ }
52
+ if (javaScriptEnabled !== undefined) {
53
+ await page.setJavaScriptEnabled(javaScriptEnabled);
54
+ }
55
+ }
56
+ extractActions(actions) {
57
+ const actionsBeforeRequest = [];
58
+ const actionsAfterRequest = [];
59
+ if (!actions) {
60
+ actions = [];
61
+ }
62
+ for (const action of actions) {
63
+ const actionType = typeof action === "function" ? constants_1.defaultPuppeteerActionType : action.type;
64
+ const actionFunc = typeof action === "function" ? action : action.exec;
65
+ if (actionType === "beforeRequest") {
66
+ actionsBeforeRequest.push(actionFunc);
67
+ }
68
+ else {
69
+ actionsAfterRequest.push(actionFunc);
70
+ }
71
+ }
72
+ return {
73
+ before: actionsBeforeRequest,
74
+ after: actionsAfterRequest
75
+ };
76
+ }
77
+ async executeActions(page, actions) {
78
+ for (const action of actions) {
79
+ await action(page);
80
+ }
81
+ }
82
+ async fetch({ url, javaScriptEnabled, maxRetries = 0, actions, retries = 0, retryDelay, }) {
83
+ await this.ensureBrowser();
84
+ const failedAttempts = [];
85
+ const attemptRequest = async (currentRetry) => {
86
+ let page = undefined;
87
+ try {
88
+ const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
89
+ const { before: actionsBeforeRequest, after: actionsAfterRequest } = this.extractActions(actions);
90
+ page = await this.browser.newPage();
91
+ await this.configurePage(page, { javaScriptEnabled: javaScriptEnabled });
92
+ await this.executeActions(page, actionsBeforeRequest);
93
+ const response = await page.goto(fullUrl);
94
+ await this.executeActions(page, actionsAfterRequest);
95
+ const content = await page.content();
96
+ await page.close();
97
+ const status = response === null || response === void 0 ? void 0 : response.status();
98
+ if (status === undefined || !this.isSuccess(status)) {
99
+ throw new core_1.InvalidStatusCodeError(status !== null && status !== void 0 ? status : 500);
100
+ }
101
+ return new core_1.HttpResponse({
102
+ body: content,
103
+ headers: (response === null || response === void 0 ? void 0 : response.headers()) || {},
104
+ status: (response === null || response === void 0 ? void 0 : response.status()) || 200,
105
+ statusText: (response === null || response === void 0 ? void 0 : response.statusText()) || "Ok",
106
+ attempts: currentRetry + 1,
107
+ failedAttempts: failedAttempts,
108
+ });
109
+ }
110
+ catch (error) {
111
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
112
+ failedAttempts.push({ error: errorMessage, timestamp: new Date() });
113
+ if (page) {
114
+ await page.close().catch(() => { });
115
+ }
116
+ if (currentRetry < maxRetries) {
117
+ if (retryDelay !== undefined && retryDelay > 0) {
118
+ await (0, core_1.delay)(retryDelay);
119
+ }
120
+ return await attemptRequest(currentRetry + 1);
121
+ }
122
+ return new core_1.HttpResponse({
123
+ body: errorMessage,
124
+ headers: {},
125
+ status: error.status || 500,
126
+ statusText: "Request Failed",
127
+ attempts: currentRetry + 1,
128
+ failedAttempts: failedAttempts,
129
+ });
130
+ }
131
+ };
132
+ return await attemptRequest(retries);
133
+ }
134
+ async fetchMany({ requests, concurrency, requestDelay }) {
135
+ const results = [];
136
+ const executing = [];
137
+ for (let i = 0; i < requests.length; i++) {
138
+ const promise = this.executeRequest({
139
+ request: requests[i],
140
+ index: i,
141
+ requestDelay: requestDelay,
142
+ results: results
143
+ }).then(() => undefined);
144
+ executing.push(promise);
145
+ if (this.shouldThrottle(executing, concurrency)) {
146
+ await this.handleConcurrency(executing);
147
+ }
148
+ }
149
+ await Promise.all(executing);
150
+ return results;
151
+ }
152
+ async close() {
153
+ if (this.browser) {
154
+ await this.closeBrowser();
155
+ }
156
+ }
157
+ }
158
+ exports.PuppeteerClient = PuppeteerClient;
package/package.json CHANGED
@@ -1,16 +1,17 @@
1
1
  {
2
2
  "name": "@xcrap/puppeteer-client",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "Xcrap Puppeteer Client is a package of the Xcrap framework that implements an HTTP client using the Puppeteer library.",
5
5
  "main": "./dist/index.js",
6
+ "module": "./dist/index.js",
6
7
  "types": "./dist/index.d.ts",
7
8
  "files": [
8
9
  "dist/*",
9
10
  "!/**/__tests__"
10
11
  ],
11
12
  "scripts": {
12
- "test": "jest",
13
- "build": "rollup -c"
13
+ "build": "tsc",
14
+ "test": "jest"
14
15
  },
15
16
  "keywords": [
16
17
  "xcrap",
@@ -23,20 +24,15 @@
23
24
  "license": "MIT",
24
25
  "type": "commonjs",
25
26
  "devDependencies": {
26
- "@rollup/plugin-commonjs": "^28.0.3",
27
- "@rollup/plugin-node-resolve": "^16.0.1",
28
- "@rollup/plugin-typescript": "^12.1.2",
29
27
  "@types/node": "^22.13.17",
30
- "rollup": "^4.39.0",
31
28
  "ts-node": "^10.9.2",
32
- "tslib": "^2.8.1",
33
29
  "typescript": "^5.8.2"
34
30
  },
35
31
  "dependencies": {
36
32
  "puppeteer": "^24.5.0"
37
33
  },
38
34
  "peerDependencies": {
39
- "@xcrap/core": "^0.0.3",
40
- "@xcrap/parser": "^0.0.2"
35
+ "@xcrap/core": "*",
36
+ "@xcrap/parser": "*"
41
37
  }
42
38
  }
@@ -1,5 +0,0 @@
1
- 'use strict';
2
-
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.defaultPuppeteerActionType = void 0;
5
- exports.defaultPuppeteerActionType = "afterRequest";
package/dist/index.cjs DELETED
@@ -1,169 +0,0 @@
1
- 'use strict';
2
-
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.PuppeteerClient = exports.PuppeteerClientActionType = void 0;
5
- const tslib_1 = require("tslib");
6
- const core_1 = require("@xcrap/core");
7
- const puppeteer_1 = tslib_1.__importDefault(require("puppeteer"));
8
- const constants_1 = require("./constants");
9
- var PuppeteerClientActionType;
10
- (function (PuppeteerClientActionType) {
11
- PuppeteerClientActionType["BeforeRequest"] = "beforeRequest";
12
- PuppeteerClientActionType["AfterRequest"] = "afterRequest";
13
- })(PuppeteerClientActionType || (exports.PuppeteerClientActionType = PuppeteerClientActionType = {}));
14
- class PuppeteerClient extends core_1.BaseClient {
15
- constructor(options = {}) {
16
- super(options);
17
- this.options = options;
18
- this.options = options;
19
- this.browser = undefined;
20
- }
21
- initBrowser() {
22
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
23
- const puppeteerArguments = [];
24
- if (this.proxy) {
25
- puppeteerArguments.push(`--proxy-server=${this.currentProxy}`);
26
- }
27
- if (this.options.args && this.options.args.length > 0) {
28
- puppeteerArguments.push(...this.options.args);
29
- }
30
- this.browser = yield puppeteer_1.default.launch(Object.assign(Object.assign({}, this.options), { args: puppeteerArguments, headless: this.options.headless ? "shell" : false }));
31
- });
32
- }
33
- ensureBrowser() {
34
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
35
- if (!this.browser) {
36
- yield this.initBrowser();
37
- }
38
- });
39
- }
40
- closeBrowser() {
41
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
42
- if (this.browser) {
43
- yield this.browser.close();
44
- this.browser = undefined;
45
- }
46
- });
47
- }
48
- configurePage(page_1, _a) {
49
- return tslib_1.__awaiter(this, arguments, void 0, function* (page, { javaScriptEnabled }) {
50
- var _b;
51
- if (this.currentUserAgent) {
52
- yield page.setUserAgent((_b = this.currentUserAgent) !== null && _b !== void 0 ? _b : core_1.defaultUserAgent);
53
- }
54
- if (javaScriptEnabled !== undefined) {
55
- yield page.setJavaScriptEnabled(javaScriptEnabled);
56
- }
57
- });
58
- }
59
- extractActions(actions) {
60
- const actionsBeforeRequest = [];
61
- const actionsAfterRequest = [];
62
- if (!actions) {
63
- actions = [];
64
- }
65
- for (const action of actions) {
66
- const actionType = typeof action === "function" ? constants_1.defaultPuppeteerActionType : action.type;
67
- const actionFunc = typeof action === "function" ? action : action.exec;
68
- if (actionType === "beforeRequest") {
69
- actionsBeforeRequest.push(actionFunc);
70
- }
71
- else {
72
- actionsAfterRequest.push(actionFunc);
73
- }
74
- }
75
- return {
76
- before: actionsBeforeRequest,
77
- after: actionsAfterRequest
78
- };
79
- }
80
- executeActions(page, actions) {
81
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
82
- for (const action of actions) {
83
- yield action(page);
84
- }
85
- });
86
- }
87
- fetch(_a) {
88
- return tslib_1.__awaiter(this, arguments, void 0, function* ({ url, javaScriptEnabled, maxRetries = 0, actions, retries = 0, retryDelay, }) {
89
- yield this.ensureBrowser();
90
- const failedAttempts = [];
91
- const attemptRequest = (currentRetry) => tslib_1.__awaiter(this, void 0, void 0, function* () {
92
- let page = undefined;
93
- try {
94
- const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
95
- const { before: actionsBeforeRequest, after: actionsAfterRequest } = this.extractActions(actions);
96
- page = yield this.browser.newPage();
97
- yield this.configurePage(page, { javaScriptEnabled: javaScriptEnabled });
98
- yield this.executeActions(page, actionsBeforeRequest);
99
- const response = yield page.goto(fullUrl);
100
- yield this.executeActions(page, actionsAfterRequest);
101
- const content = yield page.content();
102
- yield page.close();
103
- const status = response === null || response === void 0 ? void 0 : response.status();
104
- if (status === undefined || !this.isSuccess(status)) {
105
- throw new core_1.InvalidStatusCodeError(status !== null && status !== void 0 ? status : 500);
106
- }
107
- return new core_1.HttpResponse({
108
- body: content,
109
- headers: (response === null || response === void 0 ? void 0 : response.headers()) || {},
110
- status: (response === null || response === void 0 ? void 0 : response.status()) || 200,
111
- statusText: (response === null || response === void 0 ? void 0 : response.statusText()) || "Ok",
112
- attempts: currentRetry + 1,
113
- failedAttempts: failedAttempts,
114
- });
115
- }
116
- catch (error) {
117
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
118
- failedAttempts.push({ error: errorMessage, timestamp: new Date() });
119
- if (page) {
120
- yield page.close().catch(() => { });
121
- }
122
- if (currentRetry < maxRetries) {
123
- if (retryDelay !== undefined && retryDelay > 0) {
124
- yield (0, core_1.delay)(retryDelay);
125
- }
126
- return yield attemptRequest(currentRetry + 1);
127
- }
128
- return new core_1.HttpResponse({
129
- body: errorMessage,
130
- headers: {},
131
- status: error.status || 500,
132
- statusText: "Request Failed",
133
- attempts: currentRetry + 1,
134
- failedAttempts: failedAttempts,
135
- });
136
- }
137
- });
138
- return yield attemptRequest(retries);
139
- });
140
- }
141
- fetchMany(_a) {
142
- return tslib_1.__awaiter(this, arguments, void 0, function* ({ requests, concurrency, requestDelay }) {
143
- const results = [];
144
- const executing = [];
145
- for (let i = 0; i < requests.length; i++) {
146
- const promise = this.executeRequest({
147
- request: requests[i],
148
- index: i,
149
- requestDelay: requestDelay,
150
- results: results
151
- }).then(() => undefined);
152
- executing.push(promise);
153
- if (this.shouldThrottle(executing, concurrency)) {
154
- yield this.handleConcurrency(executing);
155
- }
156
- }
157
- yield Promise.all(executing);
158
- return results;
159
- });
160
- }
161
- close() {
162
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
163
- if (this.browser) {
164
- yield this.closeBrowser();
165
- }
166
- });
167
- }
168
- }
169
- exports.PuppeteerClient = PuppeteerClient;
package/dist/index.mjs DELETED
@@ -1,167 +0,0 @@
1
- Object.defineProperty(exports, "__esModule", { value: true });
2
- exports.PuppeteerClient = exports.PuppeteerClientActionType = void 0;
3
- const tslib_1 = require("tslib");
4
- const core_1 = require("@xcrap/core");
5
- const puppeteer_1 = tslib_1.__importDefault(require("puppeteer"));
6
- const constants_1 = require("./constants");
7
- var PuppeteerClientActionType;
8
- (function (PuppeteerClientActionType) {
9
- PuppeteerClientActionType["BeforeRequest"] = "beforeRequest";
10
- PuppeteerClientActionType["AfterRequest"] = "afterRequest";
11
- })(PuppeteerClientActionType || (exports.PuppeteerClientActionType = PuppeteerClientActionType = {}));
12
- class PuppeteerClient extends core_1.BaseClient {
13
- constructor(options = {}) {
14
- super(options);
15
- this.options = options;
16
- this.options = options;
17
- this.browser = undefined;
18
- }
19
- initBrowser() {
20
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
21
- const puppeteerArguments = [];
22
- if (this.proxy) {
23
- puppeteerArguments.push(`--proxy-server=${this.currentProxy}`);
24
- }
25
- if (this.options.args && this.options.args.length > 0) {
26
- puppeteerArguments.push(...this.options.args);
27
- }
28
- this.browser = yield puppeteer_1.default.launch(Object.assign(Object.assign({}, this.options), { args: puppeteerArguments, headless: this.options.headless ? "shell" : false }));
29
- });
30
- }
31
- ensureBrowser() {
32
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
33
- if (!this.browser) {
34
- yield this.initBrowser();
35
- }
36
- });
37
- }
38
- closeBrowser() {
39
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
40
- if (this.browser) {
41
- yield this.browser.close();
42
- this.browser = undefined;
43
- }
44
- });
45
- }
46
- configurePage(page_1, _a) {
47
- return tslib_1.__awaiter(this, arguments, void 0, function* (page, { javaScriptEnabled }) {
48
- var _b;
49
- if (this.currentUserAgent) {
50
- yield page.setUserAgent((_b = this.currentUserAgent) !== null && _b !== void 0 ? _b : core_1.defaultUserAgent);
51
- }
52
- if (javaScriptEnabled !== undefined) {
53
- yield page.setJavaScriptEnabled(javaScriptEnabled);
54
- }
55
- });
56
- }
57
- extractActions(actions) {
58
- const actionsBeforeRequest = [];
59
- const actionsAfterRequest = [];
60
- if (!actions) {
61
- actions = [];
62
- }
63
- for (const action of actions) {
64
- const actionType = typeof action === "function" ? constants_1.defaultPuppeteerActionType : action.type;
65
- const actionFunc = typeof action === "function" ? action : action.exec;
66
- if (actionType === "beforeRequest") {
67
- actionsBeforeRequest.push(actionFunc);
68
- }
69
- else {
70
- actionsAfterRequest.push(actionFunc);
71
- }
72
- }
73
- return {
74
- before: actionsBeforeRequest,
75
- after: actionsAfterRequest
76
- };
77
- }
78
- executeActions(page, actions) {
79
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
80
- for (const action of actions) {
81
- yield action(page);
82
- }
83
- });
84
- }
85
- fetch(_a) {
86
- return tslib_1.__awaiter(this, arguments, void 0, function* ({ url, javaScriptEnabled, maxRetries = 0, actions, retries = 0, retryDelay, }) {
87
- yield this.ensureBrowser();
88
- const failedAttempts = [];
89
- const attemptRequest = (currentRetry) => tslib_1.__awaiter(this, void 0, void 0, function* () {
90
- let page = undefined;
91
- try {
92
- const fullUrl = this.currentProxyUrl ? `${this.currentProxyUrl}${url}` : url;
93
- const { before: actionsBeforeRequest, after: actionsAfterRequest } = this.extractActions(actions);
94
- page = yield this.browser.newPage();
95
- yield this.configurePage(page, { javaScriptEnabled: javaScriptEnabled });
96
- yield this.executeActions(page, actionsBeforeRequest);
97
- const response = yield page.goto(fullUrl);
98
- yield this.executeActions(page, actionsAfterRequest);
99
- const content = yield page.content();
100
- yield page.close();
101
- const status = response === null || response === void 0 ? void 0 : response.status();
102
- if (status === undefined || !this.isSuccess(status)) {
103
- throw new core_1.InvalidStatusCodeError(status !== null && status !== void 0 ? status : 500);
104
- }
105
- return new core_1.HttpResponse({
106
- body: content,
107
- headers: (response === null || response === void 0 ? void 0 : response.headers()) || {},
108
- status: (response === null || response === void 0 ? void 0 : response.status()) || 200,
109
- statusText: (response === null || response === void 0 ? void 0 : response.statusText()) || "Ok",
110
- attempts: currentRetry + 1,
111
- failedAttempts: failedAttempts,
112
- });
113
- }
114
- catch (error) {
115
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
116
- failedAttempts.push({ error: errorMessage, timestamp: new Date() });
117
- if (page) {
118
- yield page.close().catch(() => { });
119
- }
120
- if (currentRetry < maxRetries) {
121
- if (retryDelay !== undefined && retryDelay > 0) {
122
- yield (0, core_1.delay)(retryDelay);
123
- }
124
- return yield attemptRequest(currentRetry + 1);
125
- }
126
- return new core_1.HttpResponse({
127
- body: errorMessage,
128
- headers: {},
129
- status: error.status || 500,
130
- statusText: "Request Failed",
131
- attempts: currentRetry + 1,
132
- failedAttempts: failedAttempts,
133
- });
134
- }
135
- });
136
- return yield attemptRequest(retries);
137
- });
138
- }
139
- fetchMany(_a) {
140
- return tslib_1.__awaiter(this, arguments, void 0, function* ({ requests, concurrency, requestDelay }) {
141
- const results = [];
142
- const executing = [];
143
- for (let i = 0; i < requests.length; i++) {
144
- const promise = this.executeRequest({
145
- request: requests[i],
146
- index: i,
147
- requestDelay: requestDelay,
148
- results: results
149
- }).then(() => undefined);
150
- executing.push(promise);
151
- if (this.shouldThrottle(executing, concurrency)) {
152
- yield this.handleConcurrency(executing);
153
- }
154
- }
155
- yield Promise.all(executing);
156
- return results;
157
- });
158
- }
159
- close() {
160
- return tslib_1.__awaiter(this, void 0, void 0, function* () {
161
- if (this.browser) {
162
- yield this.closeBrowser();
163
- }
164
- });
165
- }
166
- }
167
- exports.PuppeteerClient = PuppeteerClient;