@rafikidota/scoutee 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/modules/cheerio/cheerio.module.d.ts +2 -0
- package/dist/modules/cheerio/cheerio.module.js +26 -0
- package/dist/modules/cheerio/cheerio.module.js.map +1 -0
- package/dist/modules/cheerio/cheerio.service.d.ts +16 -0
- package/dist/modules/cheerio/cheerio.service.js +61 -0
- package/dist/modules/cheerio/cheerio.service.js.map +1 -0
- package/dist/modules/cheerio/constant/cheerio.env.constant.d.ts +6 -0
- package/dist/modules/cheerio/constant/cheerio.env.constant.js +10 -0
- package/dist/modules/cheerio/constant/cheerio.env.constant.js.map +1 -0
- package/dist/modules/cheerio/env/cheerio.env.config.d.ts +8 -0
- package/dist/modules/cheerio/env/cheerio.env.config.js +48 -0
- package/dist/modules/cheerio/env/cheerio.env.config.js.map +1 -0
- package/dist/modules/cheerio/env/cheerio.env.validation.d.ts +2 -0
- package/dist/modules/cheerio/env/cheerio.env.validation.js +46 -0
- package/dist/modules/cheerio/env/cheerio.env.validation.js.map +1 -0
- package/dist/modules/cheerio/services/cheerio.hook.service.d.ts +7 -0
- package/dist/modules/cheerio/services/cheerio.hook.service.js +69 -0
- package/dist/modules/cheerio/services/cheerio.hook.service.js.map +1 -0
- package/dist/modules/cheerio/services/cheerio.logger.service.d.ts +11 -0
- package/dist/modules/cheerio/services/cheerio.logger.service.js +44 -0
- package/dist/modules/cheerio/services/cheerio.logger.service.js.map +1 -0
- package/dist/modules/playwright/constant/playwright.env.constant.d.ts +6 -0
- package/dist/modules/playwright/constant/playwright.env.constant.js +10 -0
- package/dist/modules/playwright/constant/playwright.env.constant.js.map +1 -0
- package/dist/modules/playwright/env/playwright.env.config.d.ts +8 -0
- package/dist/modules/playwright/env/playwright.env.config.js +48 -0
- package/dist/modules/playwright/env/playwright.env.config.js.map +1 -0
- package/dist/modules/playwright/env/playwright.env.validation.d.ts +2 -0
- package/dist/modules/playwright/env/playwright.env.validation.js +46 -0
- package/dist/modules/playwright/env/playwright.env.validation.js.map +1 -0
- package/dist/modules/playwright/playwright.module.d.ts +2 -0
- package/dist/modules/playwright/playwright.module.js +30 -0
- package/dist/modules/playwright/playwright.module.js.map +1 -0
- package/dist/modules/playwright/playwright.service.d.ts +18 -0
- package/dist/modules/playwright/playwright.service.js +84 -0
- package/dist/modules/playwright/playwright.service.js.map +1 -0
- package/dist/modules/playwright/services/playwright.hook.service.d.ts +8 -0
- package/dist/modules/playwright/services/playwright.hook.service.js +72 -0
- package/dist/modules/playwright/services/playwright.hook.service.js.map +1 -0
- package/dist/modules/playwright/services/playwright.logger.service.d.ts +11 -0
- package/dist/modules/playwright/services/playwright.logger.service.js +44 -0
- package/dist/modules/playwright/services/playwright.logger.service.js.map +1 -0
- package/package.json +57 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 rafiki
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./modules/cheerio/cheerio.module"), exports);
|
|
18
|
+
__exportStar(require("./modules/cheerio/cheerio.service"), exports);
|
|
19
|
+
__exportStar(require("./modules/playwright/playwright.module"), exports);
|
|
20
|
+
__exportStar(require("./modules/playwright/playwright.service"), exports);
|
|
21
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,mEAAiD;AACjD,oEAAkD;AAElD,yEAAuD;AACvD,0EAAwD"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.CheerioModule = void 0;
|
|
10
|
+
const common_1 = require("@nestjs/common");
|
|
11
|
+
const config_1 = require("@nestjs/config");
|
|
12
|
+
const cheerio_service_1 = require("./cheerio.service");
|
|
13
|
+
const cheerio_env_config_1 = require("./env/cheerio.env.config");
|
|
14
|
+
const cheerio_logger_service_1 = require("./services/cheerio.logger.service");
|
|
15
|
+
const cheerio_hook_service_1 = require("./services/cheerio.hook.service");
|
|
16
|
+
let CheerioModule = class CheerioModule {
|
|
17
|
+
};
|
|
18
|
+
exports.CheerioModule = CheerioModule;
|
|
19
|
+
exports.CheerioModule = CheerioModule = __decorate([
|
|
20
|
+
(0, common_1.Module)({
|
|
21
|
+
imports: [config_1.ConfigModule.forFeature(cheerio_env_config_1.CheerioEnvConfig)],
|
|
22
|
+
providers: [cheerio_service_1.CheerioService, cheerio_logger_service_1.CheerioLoggerService, cheerio_hook_service_1.CheerioHookService],
|
|
23
|
+
exports: [cheerio_service_1.CheerioService],
|
|
24
|
+
})
|
|
25
|
+
], CheerioModule);
|
|
26
|
+
//# sourceMappingURL=cheerio.module.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.module.js","sourceRoot":"","sources":["../../../src/modules/cheerio/cheerio.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,2CAA8C;AAE9C,uDAAmD;AACnD,iEAA4D;AAC5D,8EAAyE;AACzE,0EAAqE;AAO9D,IAAM,aAAa,GAAnB,MAAM,aAAa;CAAG,CAAA;AAAhB,sCAAa;wBAAb,aAAa;IALzB,IAAA,eAAM,EAAC;QACN,OAAO,EAAE,CAAC,qBAAY,CAAC,UAAU,CAAC,qCAAgB,CAAC,CAAC;QACpD,SAAS,EAAE,CAAC,gCAAc,EAAE,6CAAoB,EAAE,yCAAkB,CAAC;QACrE,OAAO,EAAE,CAAC,gCAAc,CAAC;KAC1B,CAAC;GACW,aAAa,CAAG"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { ConfigService } from '@nestjs/config';
|
|
2
|
+
import { CheerioCrawler, CheerioCrawlerOptions } from 'crawlee';
|
|
3
|
+
import { CheerioLoggerService } from './services/cheerio.logger.service';
|
|
4
|
+
import { CheerioHookService } from './services/cheerio.hook.service';
|
|
5
|
+
export declare class CheerioService {
|
|
6
|
+
private readonly config;
|
|
7
|
+
private readonly logger;
|
|
8
|
+
private readonly hook;
|
|
9
|
+
private readonly maxRequests;
|
|
10
|
+
private readonly timeoutSecs;
|
|
11
|
+
private readonly maxConcurrency;
|
|
12
|
+
private readonly minConcurrency;
|
|
13
|
+
private readonly maxRequestRetries;
|
|
14
|
+
constructor(config: ConfigService, logger: CheerioLoggerService, hook: CheerioHookService);
|
|
15
|
+
create(options: CheerioCrawlerOptions): CheerioCrawler;
|
|
16
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.CheerioService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const config_1 = require("@nestjs/config");
|
|
15
|
+
const crawlee_1 = require("crawlee");
|
|
16
|
+
const cheerio_logger_service_1 = require("./services/cheerio.logger.service");
|
|
17
|
+
const cheerio_hook_service_1 = require("./services/cheerio.hook.service");
|
|
18
|
+
const cheerio_env_constant_1 = require("./constant/cheerio.env.constant");
|
|
19
|
+
const cheerio_env_constant_2 = require("./constant/cheerio.env.constant");
|
|
20
|
+
const cheerio_env_constant_3 = require("./constant/cheerio.env.constant");
|
|
21
|
+
const cheerio_env_constant_4 = require("./constant/cheerio.env.constant");
|
|
22
|
+
const cheerio_env_constant_5 = require("./constant/cheerio.env.constant");
|
|
23
|
+
let CheerioService = class CheerioService {
|
|
24
|
+
constructor(config, logger, hook) {
|
|
25
|
+
this.config = config;
|
|
26
|
+
this.logger = logger;
|
|
27
|
+
this.hook = hook;
|
|
28
|
+
this.maxConcurrency = this.config.get(cheerio_env_constant_1.MAX_CONCURRENCY);
|
|
29
|
+
this.minConcurrency = this.config.get(cheerio_env_constant_2.MIN_CONCURRENCY);
|
|
30
|
+
this.maxRequestRetries = this.config.get(cheerio_env_constant_3.MAX_REQUEST_RETRIES);
|
|
31
|
+
this.timeoutSecs = this.config.get(cheerio_env_constant_4.TIMEOUT_SECS);
|
|
32
|
+
this.maxRequests = this.config.get(cheerio_env_constant_5.MAX_REQUESTS);
|
|
33
|
+
}
|
|
34
|
+
create(options) {
|
|
35
|
+
const { preNavigationHooks = [], postNavigationHooks = [] } = options;
|
|
36
|
+
const defaultPreNavHooks = this.hook.getPreNavigationHooks();
|
|
37
|
+
const defaultPostNavHooks = this.hook.getPostNavigationHooks();
|
|
38
|
+
const defaultOptions = {
|
|
39
|
+
maxRequestsPerCrawl: this.maxRequests,
|
|
40
|
+
requestHandlerTimeoutSecs: this.timeoutSecs,
|
|
41
|
+
maxConcurrency: this.maxConcurrency,
|
|
42
|
+
minConcurrency: this.minConcurrency,
|
|
43
|
+
maxRequestRetries: this.maxRequestRetries,
|
|
44
|
+
};
|
|
45
|
+
this.logger.log('Creating Cheerio Crawler instance');
|
|
46
|
+
return new crawlee_1.CheerioCrawler({
|
|
47
|
+
...defaultOptions,
|
|
48
|
+
...options,
|
|
49
|
+
preNavigationHooks: [...defaultPreNavHooks, ...preNavigationHooks],
|
|
50
|
+
postNavigationHooks: [...defaultPostNavHooks, ...postNavigationHooks],
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
exports.CheerioService = CheerioService;
|
|
55
|
+
exports.CheerioService = CheerioService = __decorate([
|
|
56
|
+
(0, common_1.Injectable)(),
|
|
57
|
+
__metadata("design:paramtypes", [config_1.ConfigService,
|
|
58
|
+
cheerio_logger_service_1.CheerioLoggerService,
|
|
59
|
+
cheerio_hook_service_1.CheerioHookService])
|
|
60
|
+
], CheerioService);
|
|
61
|
+
//# sourceMappingURL=cheerio.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.service.js","sourceRoot":"","sources":["../../../src/modules/cheerio/cheerio.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAA4C;AAC5C,2CAA+C;AAE/C,qCAAgE;AAChE,8EAAyE;AACzE,0EAAqE;AACrE,0EAAkE;AAClE,0EAAkE;AAClE,0EAAsE;AACtE,0EAA+D;AAC/D,0EAA+D;AAGxD,IAAM,cAAc,GAApB,MAAM,cAAc;IAOzB,YACmB,MAAqB,EACrB,MAA4B,EAC5B,IAAwB;QAFxB,WAAM,GAAN,MAAM,CAAe;QACrB,WAAM,GAAN,MAAM,CAAsB;QAC5B,SAAI,GAAJ,IAAI,CAAoB;QAEzC,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,sCAAe,CAAE,CAAC;QAChE,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,sCAAe,CAAE,CAAC;QAChE,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,0CAAmB,CAAE,CAAC;QACvE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,mCAAY,CAAE,CAAC;QAC1D,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,mCAAY,CAAE,CAAC;IAC5D,CAAC;IAED,MAAM,CAAC,OAA8B;QACnC,MAAM,EAAE,kBAAkB,GAAG,EAAE,EAAE,mBAAmB,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC;QACtE,MAAM,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC7D,MAAM,mBAAmB,GAAG,IAAI,CAAC,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC/D,MAAM,cAAc,GAA0B;YAC5C,mBAAmB,EAAE,IAAI,CAAC,WAAW;YACrC,yBAAyB,EAAE,IAAI,CAAC,WAAW;YAC3C,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;SAC1C,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACrD,OAAO,IAAI,wBAAc,CAAC;YACxB,GAAG,cAAc;YACjB,GAAG,OAAO;YACV,kBAAkB,EAAE,CAAC,GAAG,kBAAkB,EAAE,GAAG,kBAAkB,CAAC;YAClE,mBAAmB,EAAE,CAAC,GAAG,mBAAmB,EAAE,GAAG,mBAAmB,CAAC;SACtE,CAAC,CAAC;IACL,CAAC;CACF,CAAA;AAtCY,wCAAc;yBAAd,cAAc;IAD1B,IAAA,mBAAU,GAAE;qCASgB,sBAAa;QACb,6CAAoB;QACtB,yCAAkB;GAVhC,cAAc,CAsC1B"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare const MIN_CONCURRENCY = "CRAWLEE_CHEERIO_MIN_CONCURRENCY";
|
|
2
|
+
export declare const MAX_CONCURRENCY = "CRAWLEE_CHEERIO_MAX_CONCURRENCY";
|
|
3
|
+
export declare const MAX_REQUEST_RETRIES = "CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES";
|
|
4
|
+
export declare const TIMEOUT_SECS = "CRAWLEE_CHEERIO_TIMEOUT_SECS";
|
|
5
|
+
export declare const MAX_REQUESTS = "CRAWLEE_CHEERIO_MAX_REQUESTS";
|
|
6
|
+
export declare const INITIAL_PAGE = "CRAWLEE_CHEERIO_INITIAL_PAGE";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.INITIAL_PAGE = exports.MAX_REQUESTS = exports.TIMEOUT_SECS = exports.MAX_REQUEST_RETRIES = exports.MAX_CONCURRENCY = exports.MIN_CONCURRENCY = void 0;
|
|
4
|
+
exports.MIN_CONCURRENCY = 'CRAWLEE_CHEERIO_MIN_CONCURRENCY';
|
|
5
|
+
exports.MAX_CONCURRENCY = 'CRAWLEE_CHEERIO_MAX_CONCURRENCY';
|
|
6
|
+
exports.MAX_REQUEST_RETRIES = 'CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES';
|
|
7
|
+
exports.TIMEOUT_SECS = 'CRAWLEE_CHEERIO_TIMEOUT_SECS';
|
|
8
|
+
exports.MAX_REQUESTS = 'CRAWLEE_CHEERIO_MAX_REQUESTS';
|
|
9
|
+
exports.INITIAL_PAGE = 'CRAWLEE_CHEERIO_INITIAL_PAGE';
|
|
10
|
+
//# sourceMappingURL=cheerio.env.constant.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.env.constant.js","sourceRoot":"","sources":["../../../../src/modules/cheerio/constant/cheerio.env.constant.ts"],"names":[],"mappings":";;;AAAa,QAAA,eAAe,GAAG,iCAAiC,CAAC;AACpD,QAAA,eAAe,GAAG,iCAAiC,CAAC;AACpD,QAAA,mBAAmB,GAAG,qCAAqC,CAAC;AAC5D,QAAA,YAAY,GAAG,8BAA8B,CAAC;AAC9C,QAAA,YAAY,GAAG,8BAA8B,CAAC;AAC9C,QAAA,YAAY,GAAG,8BAA8B,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const CheerioEnvConfig: () => {
|
|
2
|
+
CRAWLEE_CHEERIO_MAX_CONCURRENCY: number;
|
|
3
|
+
CRAWLEE_CHEERIO_MIN_CONCURRENCY: number;
|
|
4
|
+
CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES: number;
|
|
5
|
+
CRAWLEE_CHEERIO_TIMEOUT_SECS: number;
|
|
6
|
+
CRAWLEE_CHEERIO_MAX_REQUESTS: number;
|
|
7
|
+
CRAWLEE_CHEERIO_INITIAL_PAGE: number;
|
|
8
|
+
};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.CheerioEnvConfig = void 0;
|
|
37
|
+
const dotenv = __importStar(require("dotenv"));
|
|
38
|
+
dotenv.config();
|
|
39
|
+
const CheerioEnvConfig = () => ({
|
|
40
|
+
CRAWLEE_CHEERIO_MAX_CONCURRENCY: Number(process.env.CRAWLEE_CHEERIO_MAX_CONCURRENCY),
|
|
41
|
+
CRAWLEE_CHEERIO_MIN_CONCURRENCY: Number(process.env.CRAWLEE_CHEERIO_MIN_CONCURRENCY),
|
|
42
|
+
CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES: Number(process.env.CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES),
|
|
43
|
+
CRAWLEE_CHEERIO_TIMEOUT_SECS: Number(process.env.CRAWLEE_CHEERIO_TIMEOUT_SECS),
|
|
44
|
+
CRAWLEE_CHEERIO_MAX_REQUESTS: Number(process.env.CRAWLEE_CHEERIO_MAX_REQUESTS),
|
|
45
|
+
CRAWLEE_CHEERIO_INITIAL_PAGE: Number(process.env.CRAWLEE_CHEERIO_INITIAL_PAGE),
|
|
46
|
+
});
|
|
47
|
+
exports.CheerioEnvConfig = CheerioEnvConfig;
|
|
48
|
+
//# sourceMappingURL=cheerio.env.config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.env.config.js","sourceRoot":"","sources":["../../../../src/modules/cheerio/env/cheerio.env.config.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AAEjC,MAAM,CAAC,MAAM,EAAE,CAAC;AACT,MAAM,gBAAgB,GAAG,GAAG,EAAE,CAAC,CAAC;IACrC,+BAA+B,EAAE,MAAM,CACrC,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAC5C;IACD,+BAA+B,EAAE,MAAM,CACrC,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAC5C;IACD,mCAAmC,EAAE,MAAM,CACzC,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAChD;IACD,4BAA4B,EAAE,MAAM,CAClC,OAAO,CAAC,GAAG,CAAC,4BAA4B,CACzC;IACD,4BAA4B,EAAE,MAAM,CAClC,OAAO,CAAC,GAAG,CAAC,4BAA4B,CACzC;IACD,4BAA4B,EAAE,MAAM,CAClC,OAAO,CAAC,GAAG,CAAC,4BAA4B,CACzC;CACF,CAAC,CAAC;AAnBU,QAAA,gBAAgB,oBAmB1B"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.CheerioConfigValidationSchema = void 0;
|
|
37
|
+
const joi = __importStar(require("joi"));
|
|
38
|
+
exports.CheerioConfigValidationSchema = joi.object({
|
|
39
|
+
CRAWLEE_CHEERIO_MIN_CONCURRENCY: joi.number().optional().default(1),
|
|
40
|
+
CRAWLEE_CHEERIO_MAX_CONCURRENCY: joi.number().optional().default(5),
|
|
41
|
+
CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES: joi.number().optional().default(10),
|
|
42
|
+
CRAWLEE_CHEERIO_TIMEOUT_SECS: joi.number().optional().default(180),
|
|
43
|
+
CRAWLEE_CHEERIO_MAX_REQUESTS: joi.number().optional().default(100000),
|
|
44
|
+
CRAWLEE_CHEERIO_INITIAL_PAGE: joi.number().optional().default(1),
|
|
45
|
+
});
|
|
46
|
+
//# sourceMappingURL=cheerio.env.validation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.env.validation.js","sourceRoot":"","sources":["../../../../src/modules/cheerio/env/cheerio.env.validation.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yCAA2B;AAEd,QAAA,6BAA6B,GAAG,GAAG,CAAC,MAAM,CAAC;IACtD,+BAA+B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACnE,+BAA+B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACnE,mCAAmC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IACxE,4BAA4B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC;IAClE,4BAA4B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC;IACrE,4BAA4B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;CACjE,CAAC,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { CheerioLoggerService } from './cheerio.logger.service';
|
|
2
|
+
export declare class CheerioHookService {
|
|
3
|
+
private readonly logger;
|
|
4
|
+
constructor(logger: CheerioLoggerService);
|
|
5
|
+
getPreNavigationHooks(): (({ request }: any) => void)[];
|
|
6
|
+
getPostNavigationHooks(): (({ request, response }: any) => void)[];
|
|
7
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.CheerioHookService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const cheerio_logger_service_1 = require("./cheerio.logger.service");
|
|
15
|
+
let CheerioHookService = class CheerioHookService {
|
|
16
|
+
constructor(logger) {
|
|
17
|
+
this.logger = logger;
|
|
18
|
+
}
|
|
19
|
+
getPreNavigationHooks() {
|
|
20
|
+
return [
|
|
21
|
+
({ request }) => {
|
|
22
|
+
this.logger.log(`🔎 ${request.url}`);
|
|
23
|
+
},
|
|
24
|
+
];
|
|
25
|
+
}
|
|
26
|
+
getPostNavigationHooks() {
|
|
27
|
+
return [
|
|
28
|
+
({ request, response }) => {
|
|
29
|
+
const { url } = request;
|
|
30
|
+
const { statusCode } = response;
|
|
31
|
+
switch (statusCode) {
|
|
32
|
+
case common_1.HttpStatus.OK:
|
|
33
|
+
this.logger.log(`✅ ${statusCode} ${url}`);
|
|
34
|
+
break;
|
|
35
|
+
case common_1.HttpStatus.BAD_REQUEST:
|
|
36
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
37
|
+
break;
|
|
38
|
+
case common_1.HttpStatus.UNAUTHORIZED:
|
|
39
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
40
|
+
break;
|
|
41
|
+
case common_1.HttpStatus.FORBIDDEN:
|
|
42
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
43
|
+
break;
|
|
44
|
+
case common_1.HttpStatus.TOO_MANY_REQUESTS:
|
|
45
|
+
this.logger.warn(`⛔ ${statusCode} ${url}`);
|
|
46
|
+
break;
|
|
47
|
+
case common_1.HttpStatus.INTERNAL_SERVER_ERROR:
|
|
48
|
+
this.logger.error(`💥 ${statusCode} ${url}`);
|
|
49
|
+
break;
|
|
50
|
+
case common_1.HttpStatus.BAD_GATEWAY:
|
|
51
|
+
this.logger.error(`🚧 ${statusCode} ${url}`);
|
|
52
|
+
break;
|
|
53
|
+
case common_1.HttpStatus.SERVICE_UNAVAILABLE:
|
|
54
|
+
this.logger.error(`🚧 ${statusCode} ${url}`);
|
|
55
|
+
break;
|
|
56
|
+
default:
|
|
57
|
+
this.logger.log(`🤔 ${statusCode} ${url}`);
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
];
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
exports.CheerioHookService = CheerioHookService;
|
|
65
|
+
exports.CheerioHookService = CheerioHookService = __decorate([
|
|
66
|
+
(0, common_1.Injectable)(),
|
|
67
|
+
__metadata("design:paramtypes", [cheerio_logger_service_1.CheerioLoggerService])
|
|
68
|
+
], CheerioHookService);
|
|
69
|
+
//# sourceMappingURL=cheerio.hook.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.hook.service.js","sourceRoot":"","sources":["../../../../src/modules/cheerio/services/cheerio.hook.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAAwD;AAExD,qEAAgE;AAGzD,IAAM,kBAAkB,GAAxB,MAAM,kBAAkB;IAC7B,YAA6B,MAA4B;QAA5B,WAAM,GAAN,MAAM,CAAsB;IAAG,CAAC;IAE7D,qBAAqB;QACnB,OAAO;YACL,CAAC,EAAE,OAAO,EAAO,EAAE,EAAE;gBACnB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YACvC,CAAC;SACF,CAAC;IACJ,CAAC;IAED,sBAAsB;QACpB,OAAO;YACL,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAO,EAAE,EAAE;gBAC7B,MAAM,EAAE,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxB,MAAM,EAAE,UAAU,EAAE,GAAG,QAAQ,CAAC;gBAEhC,QAAQ,UAAU,EAAE,CAAC;oBACnB,KAAK,mBAAU,CAAC,EAAE;wBAChB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC1C,MAAM;oBACR,KAAK,mBAAU,CAAC,WAAW;wBACzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,YAAY;wBAC1B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,SAAS;wBACvB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,iBAAiB;wBAC/B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC3C,MAAM;oBACR,KAAK,mBAAU,CAAC,qBAAqB;wBACnC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR,KAAK,mBAAU,CAAC,WAAW;wBACzB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR,KAAK,mBAAU,CAAC,mBAAmB;wBACjC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR;wBACE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC3C,MAAM;gBACV,CAAC;YACH,CAAC;SACF,CAAC;IACJ,CAAC;CACF,CAAA;AAjDY,gDAAkB;6BAAlB,kBAAkB;IAD9B,IAAA,mBAAU,GAAE;qCAE0B,6CAAoB;GAD9C,kBAAkB,CAiD9B"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { LoggerService } from '@nestjs/common';
|
|
2
|
+
export declare class CheerioLoggerService implements LoggerService {
|
|
3
|
+
private readonly logger;
|
|
4
|
+
constructor();
|
|
5
|
+
log(message: any, ...optionalParams: any[]): any;
|
|
6
|
+
debug(message: any, ...optionalParams: any[]): any;
|
|
7
|
+
verbose(message: any, ...optionalParams: any[]): any;
|
|
8
|
+
warn(message: any, ...optionalParams: any[]): any;
|
|
9
|
+
error(message: any, ...optionalParams: any[]): any;
|
|
10
|
+
fatal?(message: any, ...optionalParams: any[]): any;
|
|
11
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.CheerioLoggerService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const cheerio_service_1 = require("../cheerio.service");
|
|
15
|
+
let CheerioLoggerService = class CheerioLoggerService {
|
|
16
|
+
constructor() {
|
|
17
|
+
const options = { timestamp: true };
|
|
18
|
+
this.logger = new common_1.Logger(cheerio_service_1.CheerioService.name, options);
|
|
19
|
+
}
|
|
20
|
+
log(message, ...optionalParams) {
|
|
21
|
+
this.logger.log(message, ...optionalParams);
|
|
22
|
+
}
|
|
23
|
+
debug(message, ...optionalParams) {
|
|
24
|
+
this.logger.debug(message, ...optionalParams);
|
|
25
|
+
}
|
|
26
|
+
verbose(message, ...optionalParams) {
|
|
27
|
+
this.logger.verbose(message, ...optionalParams);
|
|
28
|
+
}
|
|
29
|
+
warn(message, ...optionalParams) {
|
|
30
|
+
this.logger.warn(message, ...optionalParams);
|
|
31
|
+
}
|
|
32
|
+
error(message, ...optionalParams) {
|
|
33
|
+
this.logger.error(message, ...optionalParams);
|
|
34
|
+
}
|
|
35
|
+
fatal(message, ...optionalParams) {
|
|
36
|
+
this.logger.fatal(message, ...optionalParams);
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
exports.CheerioLoggerService = CheerioLoggerService;
|
|
40
|
+
exports.CheerioLoggerService = CheerioLoggerService = __decorate([
|
|
41
|
+
(0, common_1.Injectable)(),
|
|
42
|
+
__metadata("design:paramtypes", [])
|
|
43
|
+
], CheerioLoggerService);
|
|
44
|
+
//# sourceMappingURL=cheerio.logger.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cheerio.logger.service.js","sourceRoot":"","sources":["../../../../src/modules/cheerio/services/cheerio.logger.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAAmE;AAEnE,wDAAoD;AAG7C,IAAM,oBAAoB,GAA1B,MAAM,oBAAoB;IAE/B;QACE,MAAM,OAAO,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;QACpC,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,gCAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,GAAG,CAAC,OAAY,EAAE,GAAG,cAAqB;QACxC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC1C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAClD,CAAC;IAED,IAAI,CAAC,OAAY,EAAE,GAAG,cAAqB;QACzC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAC/C,CAAC;IAED,KAAK,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC1C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;IAED,KAAK,CAAE,OAAY,EAAE,GAAG,cAAqB;QAC3C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;CACF,CAAA;AA9BY,oDAAoB;+BAApB,oBAAoB;IADhC,IAAA,mBAAU,GAAE;;GACA,oBAAoB,CA8BhC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare const MIN_CONCURRENCY = "CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY";
|
|
2
|
+
export declare const MAX_CONCURRENCY = "CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY";
|
|
3
|
+
export declare const MAX_REQUEST_RETRIES = "CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES";
|
|
4
|
+
export declare const TIMEOUT_SECS = "CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS";
|
|
5
|
+
export declare const MAX_REQUESTS = "CRAWLEE_PLAYWRIGHT_MAX_REQUESTS";
|
|
6
|
+
export declare const INITIAL_PAGE = "CRAWLEE_PLAYWRIGHT_INITIAL_PAGE";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.INITIAL_PAGE = exports.MAX_REQUESTS = exports.TIMEOUT_SECS = exports.MAX_REQUEST_RETRIES = exports.MAX_CONCURRENCY = exports.MIN_CONCURRENCY = void 0;
|
|
4
|
+
exports.MIN_CONCURRENCY = 'CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY';
|
|
5
|
+
exports.MAX_CONCURRENCY = 'CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY';
|
|
6
|
+
exports.MAX_REQUEST_RETRIES = 'CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES';
|
|
7
|
+
exports.TIMEOUT_SECS = 'CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS';
|
|
8
|
+
exports.MAX_REQUESTS = 'CRAWLEE_PLAYWRIGHT_MAX_REQUESTS';
|
|
9
|
+
exports.INITIAL_PAGE = 'CRAWLEE_PLAYWRIGHT_INITIAL_PAGE';
|
|
10
|
+
//# sourceMappingURL=playwright.env.constant.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.env.constant.js","sourceRoot":"","sources":["../../../../src/modules/playwright/constant/playwright.env.constant.ts"],"names":[],"mappings":";;;AAAa,QAAA,eAAe,GAAG,oCAAoC,CAAC;AACvD,QAAA,eAAe,GAAG,oCAAoC,CAAC;AACvD,QAAA,mBAAmB,GAAG,wCAAwC,CAAC;AAC/D,QAAA,YAAY,GAAG,iCAAiC,CAAC;AACjD,QAAA,YAAY,GAAG,iCAAiC,CAAC;AACjD,QAAA,YAAY,GAAG,iCAAiC,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const PlaywrightEnvConfig: () => {
|
|
2
|
+
CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY: number;
|
|
3
|
+
CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY: number;
|
|
4
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES: number;
|
|
5
|
+
CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS: number;
|
|
6
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUESTS: number;
|
|
7
|
+
CRAWLEE_PLAYWRIGHT_INITIAL_PAGE: number;
|
|
8
|
+
};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.PlaywrightEnvConfig = void 0;
|
|
37
|
+
const dotenv = __importStar(require("dotenv"));
|
|
38
|
+
dotenv.config();
|
|
39
|
+
const PlaywrightEnvConfig = () => ({
|
|
40
|
+
CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY: Number(process.env.CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY),
|
|
41
|
+
CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY: Number(process.env.CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY),
|
|
42
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES: Number(process.env.CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES),
|
|
43
|
+
CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS: Number(process.env.CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS),
|
|
44
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUESTS: Number(process.env.CRAWLEE_PLAYWRIGHT_MAX_REQUESTS),
|
|
45
|
+
CRAWLEE_PLAYWRIGHT_INITIAL_PAGE: Number(process.env.CRAWLEE_PLAYWRIGHT_INITIAL_PAGE),
|
|
46
|
+
});
|
|
47
|
+
exports.PlaywrightEnvConfig = PlaywrightEnvConfig;
|
|
48
|
+
//# sourceMappingURL=playwright.env.config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.env.config.js","sourceRoot":"","sources":["../../../../src/modules/playwright/env/playwright.env.config.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AAEjC,MAAM,CAAC,MAAM,EAAE,CAAC;AACT,MAAM,mBAAmB,GAAG,GAAG,EAAE,CAAC,CAAC;IACxC,kCAAkC,EAAE,MAAM,CACxC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAC/C;IACD,kCAAkC,EAAE,MAAM,CACxC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAC/C;IACD,sCAAsC,EAAE,MAAM,CAC5C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CACnD;IACD,+BAA+B,EAAE,MAAM,CACrC,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAC5C;IACD,+BAA+B,EAAE,MAAM,CACrC,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAC5C;IACD,+BAA+B,EAAE,MAAM,CACrC,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAC5C;CACF,CAAC,CAAC;AAnBU,QAAA,mBAAmB,uBAmB7B"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.PlaywrightConfigValidationSchema = void 0;
|
|
37
|
+
const joi = __importStar(require("joi"));
|
|
38
|
+
exports.PlaywrightConfigValidationSchema = joi.object({
|
|
39
|
+
CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY: joi.number().optional().default(1),
|
|
40
|
+
CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY: joi.number().optional().default(5),
|
|
41
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES: joi.number().optional().default(10),
|
|
42
|
+
CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS: joi.number().optional().default(180),
|
|
43
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUESTS: joi.number().optional().default(100000),
|
|
44
|
+
CRAWLEE_PLAYWRIGHT_INITIAL_PAGE: joi.number().optional().default(1),
|
|
45
|
+
});
|
|
46
|
+
//# sourceMappingURL=playwright.env.validation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.env.validation.js","sourceRoot":"","sources":["../../../../src/modules/playwright/env/playwright.env.validation.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yCAA2B;AAEd,QAAA,gCAAgC,GAAG,GAAG,CAAC,MAAM,CAAC;IACzD,kCAAkC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,kCAAkC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,sCAAsC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IAC3E,+BAA+B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC;IACrE,+BAA+B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC;IACxE,+BAA+B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;CACpE,CAAC,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.PlaywrightModule = void 0;
|
|
10
|
+
const common_1 = require("@nestjs/common");
|
|
11
|
+
const config_1 = require("@nestjs/config");
|
|
12
|
+
const playwright_service_1 = require("./playwright.service");
|
|
13
|
+
const playwright_env_config_1 = require("./env/playwright.env.config");
|
|
14
|
+
const playwright_logger_service_1 = require("./services/playwright.logger.service");
|
|
15
|
+
const playwright_hook_service_1 = require("./services/playwright.hook.service");
|
|
16
|
+
let PlaywrightModule = class PlaywrightModule {
|
|
17
|
+
};
|
|
18
|
+
exports.PlaywrightModule = PlaywrightModule;
|
|
19
|
+
exports.PlaywrightModule = PlaywrightModule = __decorate([
|
|
20
|
+
(0, common_1.Module)({
|
|
21
|
+
imports: [config_1.ConfigModule.forFeature(playwright_env_config_1.PlaywrightEnvConfig)],
|
|
22
|
+
providers: [
|
|
23
|
+
playwright_service_1.PlaywrightService,
|
|
24
|
+
playwright_logger_service_1.PlaywrightLoggerService,
|
|
25
|
+
playwright_hook_service_1.PlaywrightHookService,
|
|
26
|
+
],
|
|
27
|
+
exports: [playwright_service_1.PlaywrightService],
|
|
28
|
+
})
|
|
29
|
+
], PlaywrightModule);
|
|
30
|
+
//# sourceMappingURL=playwright.module.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.module.js","sourceRoot":"","sources":["../../../src/modules/playwright/playwright.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,2CAA8C;AAE9C,6DAAyD;AACzD,uEAAkE;AAClE,oFAA+E;AAC/E,gFAA2E;AAWpE,IAAM,gBAAgB,GAAtB,MAAM,gBAAgB;CAAG,CAAA;AAAnB,4CAAgB;2BAAhB,gBAAgB;IAT5B,IAAA,eAAM,EAAC;QACN,OAAO,EAAE,CAAC,qBAAY,CAAC,UAAU,CAAC,2CAAmB,CAAC,CAAC;QACvD,SAAS,EAAE;YACT,sCAAiB;YACjB,mDAAuB;YACvB,+CAAqB;SACtB;QACD,OAAO,EAAE,CAAC,sCAAiB,CAAC;KAC7B,CAAC;GACW,gBAAgB,CAAG"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { ConfigService } from '@nestjs/config';
|
|
2
|
+
import { PlaywrightCrawler } from '@crawlee/playwright';
|
|
3
|
+
import { PlaywrightCrawlerOptions } from '@crawlee/playwright';
|
|
4
|
+
import { Browser } from 'playwright';
|
|
5
|
+
import { PlaywrightLoggerService } from './services/playwright.logger.service';
|
|
6
|
+
export declare class PlaywrightService {
|
|
7
|
+
private readonly config;
|
|
8
|
+
private readonly logger;
|
|
9
|
+
private readonly maxRequests;
|
|
10
|
+
private readonly timeoutSecs;
|
|
11
|
+
private readonly maxConcurrency;
|
|
12
|
+
private readonly minConcurrency;
|
|
13
|
+
private readonly maxRequestRetries;
|
|
14
|
+
private readonly browserArgs;
|
|
15
|
+
constructor(config: ConfigService, logger: PlaywrightLoggerService);
|
|
16
|
+
create(options: PlaywrightCrawlerOptions): PlaywrightCrawler;
|
|
17
|
+
getBrowser(): Promise<Browser>;
|
|
18
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.PlaywrightService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const config_1 = require("@nestjs/config");
|
|
15
|
+
const playwright_1 = require("@crawlee/playwright");
|
|
16
|
+
const playwright_2 = require("playwright");
|
|
17
|
+
const playwright_logger_service_1 = require("./services/playwright.logger.service");
|
|
18
|
+
const playwright_env_constant_1 = require("./constant/playwright.env.constant");
|
|
19
|
+
const playwright_env_constant_2 = require("./constant/playwright.env.constant");
|
|
20
|
+
const playwright_env_constant_3 = require("./constant/playwright.env.constant");
|
|
21
|
+
const playwright_env_constant_4 = require("./constant/playwright.env.constant");
|
|
22
|
+
const playwright_env_constant_5 = require("./constant/playwright.env.constant");
|
|
23
|
+
let PlaywrightService = class PlaywrightService {
|
|
24
|
+
constructor(config, logger) {
|
|
25
|
+
this.config = config;
|
|
26
|
+
this.logger = logger;
|
|
27
|
+
this.browserArgs = [
|
|
28
|
+
'--no-sandbox',
|
|
29
|
+
'--disable-setuid-sandbox',
|
|
30
|
+
'--disable-dev-shm-usage',
|
|
31
|
+
'--disable-accelerated-2d-canvas',
|
|
32
|
+
'--no-first-run',
|
|
33
|
+
'--no-zygote',
|
|
34
|
+
'--disable-gpu',
|
|
35
|
+
];
|
|
36
|
+
this.maxConcurrency = this.config.get(playwright_env_constant_1.MAX_CONCURRENCY);
|
|
37
|
+
this.minConcurrency = this.config.get(playwright_env_constant_2.MIN_CONCURRENCY);
|
|
38
|
+
this.maxRequestRetries = this.config.get(playwright_env_constant_3.MAX_REQUEST_RETRIES);
|
|
39
|
+
this.timeoutSecs = this.config.get(playwright_env_constant_4.TIMEOUT_SECS);
|
|
40
|
+
this.maxRequests = this.config.get(playwright_env_constant_5.MAX_REQUESTS);
|
|
41
|
+
}
|
|
42
|
+
create(options) {
|
|
43
|
+
this.logger.log('🚀 Creating Playwright Crawler instance');
|
|
44
|
+
const defaultOptions = {
|
|
45
|
+
maxRequestsPerCrawl: this.maxRequests,
|
|
46
|
+
requestHandlerTimeoutSecs: this.timeoutSecs,
|
|
47
|
+
navigationTimeoutSecs: this.timeoutSecs,
|
|
48
|
+
maxConcurrency: this.maxConcurrency,
|
|
49
|
+
minConcurrency: this.minConcurrency,
|
|
50
|
+
maxRequestRetries: this.maxRequestRetries,
|
|
51
|
+
useSessionPool: true,
|
|
52
|
+
headless: true,
|
|
53
|
+
launchContext: {
|
|
54
|
+
launchOptions: {
|
|
55
|
+
args: this.browserArgs,
|
|
56
|
+
},
|
|
57
|
+
useIncognitoPages: true,
|
|
58
|
+
},
|
|
59
|
+
browserPoolOptions: {
|
|
60
|
+
useFingerprints: true,
|
|
61
|
+
retireBrowserAfterPageCount: 50,
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
const crawler = new playwright_1.PlaywrightCrawler({ ...defaultOptions, ...options });
|
|
65
|
+
this.logger.log('✅ Playwright Crawler instance created successfully');
|
|
66
|
+
return crawler;
|
|
67
|
+
}
|
|
68
|
+
async getBrowser() {
|
|
69
|
+
this.logger.log('🚀 Creating direct Playwright browser instance');
|
|
70
|
+
const browser = await playwright_2.chromium.launch({
|
|
71
|
+
headless: true,
|
|
72
|
+
args: this.browserArgs,
|
|
73
|
+
});
|
|
74
|
+
this.logger.log('✅ Playwright Browser instance created successfully');
|
|
75
|
+
return browser;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
exports.PlaywrightService = PlaywrightService;
|
|
79
|
+
exports.PlaywrightService = PlaywrightService = __decorate([
|
|
80
|
+
(0, common_1.Injectable)(),
|
|
81
|
+
__metadata("design:paramtypes", [config_1.ConfigService,
|
|
82
|
+
playwright_logger_service_1.PlaywrightLoggerService])
|
|
83
|
+
], PlaywrightService);
|
|
84
|
+
//# sourceMappingURL=playwright.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.service.js","sourceRoot":"","sources":["../../../src/modules/playwright/playwright.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAA4C;AAC5C,2CAA+C;AAE/C,oDAAwD;AAExD,2CAA+C;AAE/C,oFAA+E;AAE/E,gFAAqE;AACrE,gFAAqE;AACrE,gFAAyE;AACzE,gFAAkE;AAClE,gFAAkE;AAG3D,IAAM,iBAAiB,GAAvB,MAAM,iBAAiB;IAgB5B,YACmB,MAAqB,EACrB,MAA+B;QAD/B,WAAM,GAAN,MAAM,CAAe;QACrB,WAAM,GAAN,MAAM,CAAyB;QAZjC,gBAAW,GAAG;YAC7B,cAAc;YACd,0BAA0B;YAC1B,yBAAyB;YACzB,iCAAiC;YACjC,gBAAgB;YAChB,aAAa;YACb,eAAe;SAChB,CAAC;QAMA,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,yCAAe,CAAE,CAAC;QAChE,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,yCAAe,CAAE,CAAC;QAChE,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,6CAAmB,CAAE,CAAC;QACvE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,sCAAY,CAAE,CAAC;QAC1D,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAS,sCAAY,CAAE,CAAC;IAC5D,CAAC;IAED,MAAM,CAAC,OAAiC;QACtC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QAC3D,MAAM,cAAc,GAA6B;YAC/C,mBAAmB,EAAE,IAAI,CAAC,WAAW;YACrC,yBAAyB,EAAE,IAAI,CAAC,WAAW;YAC3C,qBAAqB,EAAE,IAAI,CAAC,WAAW;YACvC,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;YACzC,cAAc,EAAE,IAAI;YACpB,QAAQ,EAAE,IAAI;YACd,aAAa,EAAE;gBACb,aAAa,EAAE;oBACb,IAAI,EAAE,IAAI,CAAC,WAAW;iBACvB;gBACD,iBAAiB,EAAE,IAAI;aACxB;YACD,kBAAkB,EAAE;gBAClB,eAAe,EAAE,IAAI;gBACrB,2BAA2B,EAAE,EAAE;aAChC;SACF,CAAC;QACF,MAAM,OAAO,GAAG,IAAI,8BAAiB,CAAC,EAAE,GAAG,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QACzE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QACtE,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;QAClE,MAAM,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,CAAC;YACpC,QAAQ,EAAE,IAAI;YACd,IAAI,EAAE,IAAI,CAAC,WAAW;SACvB,CAAC,CAAC;QACH,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QACtE,OAAO,OAAO,CAAC;IACjB,CAAC;CACF,CAAA;AA/DY,8CAAiB;4BAAjB,iBAAiB;IAD7B,IAAA,mBAAU,GAAE;qCAkBgB,sBAAa;QACb,mDAAuB;GAlBvC,iBAAiB,CA+D7B"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { PlaywrightLoggerService } from './playwright.logger.service';
|
|
2
|
+
import { PlaywrightHook } from '@crawlee/playwright';
|
|
3
|
+
export declare class PlaywrightHookService {
|
|
4
|
+
private readonly logger;
|
|
5
|
+
constructor(logger: PlaywrightLoggerService);
|
|
6
|
+
getPreNavigationHooks(): PlaywrightHook[];
|
|
7
|
+
getPostNavigationHooks(): PlaywrightHook[];
|
|
8
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.PlaywrightHookService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const playwright_logger_service_1 = require("./playwright.logger.service");
|
|
15
|
+
let PlaywrightHookService = class PlaywrightHookService {
|
|
16
|
+
constructor(logger) {
|
|
17
|
+
this.logger = logger;
|
|
18
|
+
}
|
|
19
|
+
getPreNavigationHooks() {
|
|
20
|
+
return [
|
|
21
|
+
({ request }) => {
|
|
22
|
+
this.logger.log(`🔎 ${request.url}`);
|
|
23
|
+
},
|
|
24
|
+
];
|
|
25
|
+
}
|
|
26
|
+
getPostNavigationHooks() {
|
|
27
|
+
return [
|
|
28
|
+
async ({ handleCloudflareChallenge }) => {
|
|
29
|
+
await handleCloudflareChallenge();
|
|
30
|
+
},
|
|
31
|
+
({ request, response }) => {
|
|
32
|
+
const { url } = request;
|
|
33
|
+
const statusCode = response.status();
|
|
34
|
+
switch (statusCode) {
|
|
35
|
+
case common_1.HttpStatus.OK:
|
|
36
|
+
this.logger.log(`✅ ${statusCode} ${url}`);
|
|
37
|
+
break;
|
|
38
|
+
case common_1.HttpStatus.BAD_REQUEST:
|
|
39
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
40
|
+
break;
|
|
41
|
+
case common_1.HttpStatus.UNAUTHORIZED:
|
|
42
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
43
|
+
break;
|
|
44
|
+
case common_1.HttpStatus.FORBIDDEN:
|
|
45
|
+
this.logger.warn(`⚠️ ${statusCode} ${url}`);
|
|
46
|
+
break;
|
|
47
|
+
case common_1.HttpStatus.TOO_MANY_REQUESTS:
|
|
48
|
+
this.logger.warn(`⛔ ${statusCode} ${url}`);
|
|
49
|
+
break;
|
|
50
|
+
case common_1.HttpStatus.INTERNAL_SERVER_ERROR:
|
|
51
|
+
this.logger.error(`💥 ${statusCode} ${url}`);
|
|
52
|
+
break;
|
|
53
|
+
case common_1.HttpStatus.BAD_GATEWAY:
|
|
54
|
+
this.logger.error(`🚧 ${statusCode} ${url}`);
|
|
55
|
+
break;
|
|
56
|
+
case common_1.HttpStatus.SERVICE_UNAVAILABLE:
|
|
57
|
+
this.logger.error(`🚧 ${statusCode} ${url}`);
|
|
58
|
+
break;
|
|
59
|
+
default:
|
|
60
|
+
this.logger.log(`🤔 ${statusCode} ${url}`);
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
];
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
exports.PlaywrightHookService = PlaywrightHookService;
|
|
68
|
+
exports.PlaywrightHookService = PlaywrightHookService = __decorate([
|
|
69
|
+
(0, common_1.Injectable)(),
|
|
70
|
+
__metadata("design:paramtypes", [playwright_logger_service_1.PlaywrightLoggerService])
|
|
71
|
+
], PlaywrightHookService);
|
|
72
|
+
//# sourceMappingURL=playwright.hook.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.hook.service.js","sourceRoot":"","sources":["../../../../src/modules/playwright/services/playwright.hook.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAAwD;AAExD,2EAAsE;AAI/D,IAAM,qBAAqB,GAA3B,MAAM,qBAAqB;IAChC,YAA6B,MAA+B;QAA/B,WAAM,GAAN,MAAM,CAAyB;IAAG,CAAC;IAEhE,qBAAqB;QACnB,OAAO;YACL,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YACvC,CAAC;SACF,CAAC;IACJ,CAAC;IAED,sBAAsB;QACpB,OAAO;YACL,KAAK,EAAE,EAAE,yBAAyB,EAAE,EAAE,EAAE;gBACtC,MAAM,yBAAyB,EAAE,CAAC;YACpC,CAAC;YACD,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE;gBACxB,MAAM,EAAE,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxB,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAErC,QAAQ,UAAwB,EAAE,CAAC;oBACjC,KAAK,mBAAU,CAAC,EAAE;wBAChB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC1C,MAAM;oBACR,KAAK,mBAAU,CAAC,WAAW;wBACzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,YAAY;wBAC1B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,SAAS;wBACvB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC5C,MAAM;oBACR,KAAK,mBAAU,CAAC,iBAAiB;wBAC/B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC3C,MAAM;oBACR,KAAK,mBAAU,CAAC,qBAAqB;wBACnC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR,KAAK,mBAAU,CAAC,WAAW;wBACzB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR,KAAK,mBAAU,CAAC,mBAAmB;wBACjC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC7C,MAAM;oBACR;wBACE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;wBAC3C,MAAM;gBACV,CAAC;YACH,CAAC;SACF,CAAC;IACJ,CAAC;CACF,CAAA;AApDY,sDAAqB;gCAArB,qBAAqB;IADjC,IAAA,mBAAU,GAAE;qCAE0B,mDAAuB;GADjD,qBAAqB,CAoDjC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { LoggerService } from '@nestjs/common';
|
|
2
|
+
export declare class PlaywrightLoggerService implements LoggerService {
|
|
3
|
+
private readonly logger;
|
|
4
|
+
constructor();
|
|
5
|
+
log(message: any, ...optionalParams: any[]): any;
|
|
6
|
+
debug(message: any, ...optionalParams: any[]): any;
|
|
7
|
+
verbose(message: any, ...optionalParams: any[]): any;
|
|
8
|
+
warn(message: any, ...optionalParams: any[]): any;
|
|
9
|
+
error(message: any, ...optionalParams: any[]): any;
|
|
10
|
+
fatal?(message: any, ...optionalParams: any[]): any;
|
|
11
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.PlaywrightLoggerService = void 0;
|
|
13
|
+
const common_1 = require("@nestjs/common");
|
|
14
|
+
const playwright_service_1 = require("../playwright.service");
|
|
15
|
+
let PlaywrightLoggerService = class PlaywrightLoggerService {
|
|
16
|
+
constructor() {
|
|
17
|
+
const options = { timestamp: true };
|
|
18
|
+
this.logger = new common_1.Logger(playwright_service_1.PlaywrightService.name, options);
|
|
19
|
+
}
|
|
20
|
+
log(message, ...optionalParams) {
|
|
21
|
+
this.logger.log(message, ...optionalParams);
|
|
22
|
+
}
|
|
23
|
+
debug(message, ...optionalParams) {
|
|
24
|
+
this.logger.debug(message, ...optionalParams);
|
|
25
|
+
}
|
|
26
|
+
verbose(message, ...optionalParams) {
|
|
27
|
+
this.logger.verbose(message, ...optionalParams);
|
|
28
|
+
}
|
|
29
|
+
warn(message, ...optionalParams) {
|
|
30
|
+
this.logger.warn(message, ...optionalParams);
|
|
31
|
+
}
|
|
32
|
+
error(message, ...optionalParams) {
|
|
33
|
+
this.logger.error(message, ...optionalParams);
|
|
34
|
+
}
|
|
35
|
+
fatal(message, ...optionalParams) {
|
|
36
|
+
this.logger.fatal(message, ...optionalParams);
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
exports.PlaywrightLoggerService = PlaywrightLoggerService;
|
|
40
|
+
exports.PlaywrightLoggerService = PlaywrightLoggerService = __decorate([
|
|
41
|
+
(0, common_1.Injectable)(),
|
|
42
|
+
__metadata("design:paramtypes", [])
|
|
43
|
+
], PlaywrightLoggerService);
|
|
44
|
+
//# sourceMappingURL=playwright.logger.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.logger.service.js","sourceRoot":"","sources":["../../../../src/modules/playwright/services/playwright.logger.service.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,2CAAmE;AAEnE,8DAA0D;AAGnD,IAAM,uBAAuB,GAA7B,MAAM,uBAAuB;IAElC;QACE,MAAM,OAAO,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;QACpC,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,sCAAiB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5D,CAAC;IAED,GAAG,CAAC,OAAY,EAAE,GAAG,cAAqB;QACxC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC1C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAClD,CAAC;IAED,IAAI,CAAC,OAAY,EAAE,GAAG,cAAqB;QACzC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAC/C,CAAC;IAED,KAAK,CAAC,OAAY,EAAE,GAAG,cAAqB;QAC1C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;IAED,KAAK,CAAE,OAAY,EAAE,GAAG,cAAqB;QAC3C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;CACF,CAAA;AA9BY,0DAAuB;kCAAvB,uBAAuB;IADnC,IAAA,mBAAU,GAAE;;GACA,uBAAuB,CA8BnC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@rafikidota/scoutee",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Sometimes, the best way to solve your own problems is to help someone else.",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/rafikidota/scoutee.git"
|
|
8
|
+
},
|
|
9
|
+
"author": "rafiki <71207415+rafikidota@users.noreply.github.com>",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"publishConfig": {
|
|
12
|
+
"access": "public"
|
|
13
|
+
},
|
|
14
|
+
"main": "./dist/index.js",
|
|
15
|
+
"module": "./dist/index.js",
|
|
16
|
+
"types": "./dist/index.d.ts",
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"@crawlee/playwright": "3.15.3",
|
|
19
|
+
"@eslint/eslintrc": "3.3.3",
|
|
20
|
+
"@eslint/js": "9.39.1",
|
|
21
|
+
"@nestjs/common": "11.1.9",
|
|
22
|
+
"@nestjs/config": "4.0.2",
|
|
23
|
+
"@nestjs/core": "11.1.9",
|
|
24
|
+
"@types/node": "24.10.2",
|
|
25
|
+
"@typescript-eslint/eslint-plugin": "8.49.0",
|
|
26
|
+
"@typescript-eslint/parser": "8.49.0",
|
|
27
|
+
"crawlee": "3.15.3",
|
|
28
|
+
"dotenv": "17.2.3",
|
|
29
|
+
"eslint": "9.39.1",
|
|
30
|
+
"eslint-config-prettier": "10.1.8",
|
|
31
|
+
"eslint-plugin-import": "2.32.0",
|
|
32
|
+
"eslint-plugin-prettier": "5.5.4",
|
|
33
|
+
"globals": "16.5.0",
|
|
34
|
+
"husky": "9.1.7",
|
|
35
|
+
"joi": "18.0.2",
|
|
36
|
+
"lint-staged": "16.2.7",
|
|
37
|
+
"playwright": "1.57.0",
|
|
38
|
+
"prettier": "3.7.4",
|
|
39
|
+
"rimraf": "6.1.2",
|
|
40
|
+
"rxjs": "7.8.2",
|
|
41
|
+
"typescript": "5.9.3",
|
|
42
|
+
"typescript-eslint": "8.49.0"
|
|
43
|
+
},
|
|
44
|
+
"keywords": [
|
|
45
|
+
"nestjs",
|
|
46
|
+
"crawlee",
|
|
47
|
+
"cheerio",
|
|
48
|
+
"playwright",
|
|
49
|
+
"scraping"
|
|
50
|
+
],
|
|
51
|
+
"scripts": {
|
|
52
|
+
"prebuild": "rimraf -rf dist",
|
|
53
|
+
"build": "tsc -p tsconfig.json",
|
|
54
|
+
"lint": "npx eslint --debug . --fix",
|
|
55
|
+
"format": "prettier --write \"**/*.ts\""
|
|
56
|
+
}
|
|
57
|
+
}
|