crawlee-one 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/dist/cjs/cli/cli.d.ts +1 -0
- package/dist/cjs/cli/cli.js +61 -0
- package/dist/cjs/cli/cli.js.map +1 -0
- package/dist/cjs/cli/index.d.ts +2 -0
- package/dist/cjs/cli/index.js +6 -0
- package/dist/cjs/cli/index.js.map +1 -0
- package/dist/cjs/index.d.ts +24 -0
- package/dist/cjs/index.js +43 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/lib/actions/dom.d.ts +102 -0
- package/dist/cjs/lib/actions/dom.js +743 -0
- package/dist/cjs/lib/actions/dom.js.map +1 -0
- package/dist/cjs/lib/actions/domUtils.d.ts +42 -0
- package/dist/cjs/lib/actions/domUtils.js +126 -0
- package/dist/cjs/lib/actions/domUtils.js.map +1 -0
- package/dist/cjs/lib/actions/page.d.ts +69 -0
- package/dist/cjs/lib/actions/page.js +205 -0
- package/dist/cjs/lib/actions/page.js.map +1 -0
- package/dist/cjs/lib/actions/scrapeListing.d.ts +78 -0
- package/dist/cjs/lib/actions/scrapeListing.js +242 -0
- package/dist/cjs/lib/actions/scrapeListing.js.map +1 -0
- package/dist/cjs/lib/actor/actor.d.ts +90 -0
- package/dist/cjs/lib/actor/actor.js +306 -0
- package/dist/cjs/lib/actor/actor.js.map +1 -0
- package/dist/cjs/lib/actor/types.d.ts +162 -0
- package/dist/cjs/lib/actor/types.js +3 -0
- package/dist/cjs/lib/actor/types.js.map +1 -0
- package/dist/cjs/lib/actor.d.ts +189 -0
- package/dist/cjs/lib/actor.js +225 -0
- package/dist/cjs/lib/actor.js.map +1 -0
- package/dist/cjs/lib/actorSpec.d.ts +20 -0
- package/dist/cjs/lib/actorSpec.js +3 -0
- package/dist/cjs/lib/actorSpec.js.map +1 -0
- package/dist/cjs/lib/config.d.ts +561 -0
- package/dist/cjs/lib/config.js +707 -0
- package/dist/cjs/lib/config.js.map +1 -0
- package/dist/cjs/lib/dataset/maxCount.d.ts +30 -0
- package/dist/cjs/lib/dataset/maxCount.js +55 -0
- package/dist/cjs/lib/dataset/maxCount.js.map +1 -0
- package/dist/cjs/lib/dataset/pushData.d.ts +123 -0
- package/dist/cjs/lib/dataset/pushData.js +182 -0
- package/dist/cjs/lib/dataset/pushData.js.map +1 -0
- package/dist/cjs/lib/dataset.d.ts +98 -0
- package/dist/cjs/lib/dataset.js +122 -0
- package/dist/cjs/lib/dataset.js.map +1 -0
- package/dist/cjs/lib/dom.d.ts +78 -0
- package/dist/cjs/lib/dom.js +243 -0
- package/dist/cjs/lib/dom.js.map +1 -0
- package/dist/cjs/lib/error/errorHandler.d.ts +112 -0
- package/dist/cjs/lib/error/errorHandler.js +164 -0
- package/dist/cjs/lib/error/errorHandler.js.map +1 -0
- package/dist/cjs/lib/error/sentry.d.ts +11 -0
- package/dist/cjs/lib/error/sentry.js +60 -0
- package/dist/cjs/lib/error/sentry.js.map +1 -0
- package/dist/cjs/lib/integrations/apify.d.ts +67 -0
- package/dist/cjs/lib/integrations/apify.js +106 -0
- package/dist/cjs/lib/integrations/apify.js.map +1 -0
- package/dist/cjs/lib/integrations/types.d.ts +274 -0
- package/dist/cjs/lib/integrations/types.js +3 -0
- package/dist/cjs/lib/integrations/types.js.map +1 -0
- package/dist/cjs/lib/io/dataset.d.ts +67 -0
- package/dist/cjs/lib/io/dataset.js +86 -0
- package/dist/cjs/lib/io/dataset.js.map +1 -0
- package/dist/cjs/lib/io/maxCount.d.ts +30 -0
- package/dist/cjs/lib/io/maxCount.js +55 -0
- package/dist/cjs/lib/io/maxCount.js.map +1 -0
- package/dist/cjs/lib/io/pushData.d.ts +124 -0
- package/dist/cjs/lib/io/pushData.js +193 -0
- package/dist/cjs/lib/io/pushData.js.map +1 -0
- package/dist/cjs/lib/io/pushRequests.d.ts +38 -0
- package/dist/cjs/lib/io/pushRequests.js +63 -0
- package/dist/cjs/lib/io/pushRequests.js.map +1 -0
- package/dist/cjs/lib/io/requestQueue.d.ts +28 -0
- package/dist/cjs/lib/io/requestQueue.js +40 -0
- package/dist/cjs/lib/io/requestQueue.js.map +1 -0
- package/dist/cjs/lib/log.d.ts +38 -0
- package/dist/cjs/lib/log.js +54 -0
- package/dist/cjs/lib/log.js.map +1 -0
- package/dist/cjs/lib/migrate/localMigrator.d.ts +10 -0
- package/dist/cjs/lib/migrate/localMigrator.js +57 -0
- package/dist/cjs/lib/migrate/localMigrator.js.map +1 -0
- package/dist/cjs/lib/migrate/localState.d.ts +7 -0
- package/dist/cjs/lib/migrate/localState.js +43 -0
- package/dist/cjs/lib/migrate/localState.js.map +1 -0
- package/dist/cjs/lib/migrate/types.d.ts +6 -0
- package/dist/cjs/lib/migrate/types.js +3 -0
- package/dist/cjs/lib/migrate/types.js.map +1 -0
- package/dist/cjs/lib/readme/readme.d.ts +65 -0
- package/dist/cjs/lib/readme/readme.js +534 -0
- package/dist/cjs/lib/readme/readme.js.map +1 -0
- package/dist/cjs/lib/readme/types.d.ts +260 -0
- package/dist/cjs/lib/readme/types.js +54 -0
- package/dist/cjs/lib/readme/types.js.map +1 -0
- package/dist/cjs/lib/router.d.ts +132 -0
- package/dist/cjs/lib/router.js +165 -0
- package/dist/cjs/lib/router.js.map +1 -0
- package/dist/cjs/lib/scraper/scrapeListing.d.ts +78 -0
- package/dist/cjs/lib/scraper/scrapeListing.js +242 -0
- package/dist/cjs/lib/scraper/scrapeListing.js.map +1 -0
- package/dist/cjs/lib/test/actor.d.ts +21 -0
- package/dist/cjs/lib/test/actor.js +56 -0
- package/dist/cjs/lib/test/actor.js.map +1 -0
- package/dist/cjs/lib/test/mockApifyClient.d.ts +32 -0
- package/dist/cjs/lib/test/mockApifyClient.js +176 -0
- package/dist/cjs/lib/test/mockApifyClient.js.map +1 -0
- package/dist/cjs/types.d.ts +31 -0
- package/dist/cjs/types.js +3 -0
- package/dist/cjs/types.js.map +1 -0
- package/dist/cjs/utils/async.d.ts +19 -0
- package/dist/cjs/utils/async.js +74 -0
- package/dist/cjs/utils/async.js.map +1 -0
- package/dist/cjs/utils/error.d.ts +1 -0
- package/dist/cjs/utils/error.js +10 -0
- package/dist/cjs/utils/error.js.map +1 -0
- package/dist/cjs/utils/format.d.ts +9 -0
- package/dist/cjs/utils/format.js +19 -0
- package/dist/cjs/utils/format.js.map +1 -0
- package/dist/cjs/utils/package.d.ts +15 -0
- package/dist/cjs/utils/package.js +25 -0
- package/dist/cjs/utils/package.js.map +1 -0
- package/dist/cjs/utils/types.d.ts +6 -0
- package/dist/cjs/utils/types.js +9 -0
- package/dist/cjs/utils/types.js.map +1 -0
- package/dist/cjs/utils/url.d.ts +9 -0
- package/dist/cjs/utils/url.js +32 -0
- package/dist/cjs/utils/url.js.map +1 -0
- package/dist/cjs/utils/valueMonitor.d.ts +31 -0
- package/dist/cjs/utils/valueMonitor.js +91 -0
- package/dist/cjs/utils/valueMonitor.js.map +1 -0
- package/package.json +85 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { BasicCrawlingContext, CheerioCrawlingContext, CrawlingContext, ErrorHandler, HttpCrawlingContext, JSDOMCrawlingContext, PlaywrightCrawlingContext, PuppeteerCrawlingContext } from 'crawlee';
|
|
2
|
+
import type { MaybePromise, PickRequired } from '../../utils/types';
|
|
3
|
+
import type { RouteHandler, RouterHandlerCtx } from '../router';
|
|
4
|
+
import type { CrawleeOneErrorHandlerInput, CrawleeOneErrorHandlerOptions } from '../integrations/types';
|
|
5
|
+
export type CaptureErrorInput = PickRequired<Partial<CrawleeOneErrorHandlerInput>, 'error'>;
|
|
6
|
+
export type CaptureError = (input: CaptureErrorInput) => MaybePromise<void>;
|
|
7
|
+
/**
|
|
8
|
+
* Error handling for CrawleeOne crawlers.
|
|
9
|
+
*
|
|
10
|
+
* By default, error reports are saved to Apify Dataset.
|
|
11
|
+
*
|
|
12
|
+
* See https://docs.apify.com/academy/node-js/analyzing-pages-and-fixing-errors#error-reporting
|
|
13
|
+
*/
|
|
14
|
+
export declare const captureError: <TEnv extends object = object, TReport extends object = object>(input: CaptureErrorInput, options: CrawleeOneErrorHandlerOptions<TEnv, TReport>) => Promise<never>;
|
|
15
|
+
/**
|
|
16
|
+
* Error handling for Crawlers as a function wrapper
|
|
17
|
+
*
|
|
18
|
+
* By default, error reports are saved to Apify Dataset.
|
|
19
|
+
*/
|
|
20
|
+
export declare const captureErrorWrapper: <TEnv extends object = object, TReport extends object = object>(fn: (input: {
|
|
21
|
+
captureError: CaptureError;
|
|
22
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<TEnv, TReport>) => Promise<void>;
|
|
23
|
+
/**
|
|
24
|
+
* Drop-in replacement for regular request handler callback for Crawlee route
|
|
25
|
+
* that automatically tracks errors.
|
|
26
|
+
*
|
|
27
|
+
* By default, error reports are saved to Apify Dataset.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
*
|
|
31
|
+
* router.addDefaultHandler(
|
|
32
|
+
* captureErrorRouteHandler(async (ctx) => {
|
|
33
|
+
* const { page, crawler } = ctx;
|
|
34
|
+
* const url = page.url();
|
|
35
|
+
* ...
|
|
36
|
+
* })
|
|
37
|
+
* );
|
|
38
|
+
*/
|
|
39
|
+
export declare const captureErrorRouteHandler: <Ctx extends CrawlingContext<unknown, import("crawlee").Dictionary>, TEnv extends object = object, TReport extends object = object>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
40
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
41
|
+
} & {
|
|
42
|
+
captureError: CaptureError;
|
|
43
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<TEnv, TReport>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
44
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
45
|
+
}, "request"> & {
|
|
46
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
47
|
+
}) => Promise<void>;
|
|
48
|
+
export declare const basicCaptureErrorRouteHandler: <Ctx extends BasicCrawlingContext<import("crawlee").Dictionary>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
49
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
50
|
+
} & {
|
|
51
|
+
captureError: CaptureError;
|
|
52
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
53
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
54
|
+
}, "request"> & {
|
|
55
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
56
|
+
}) => Promise<void>;
|
|
57
|
+
export declare const httpCaptureErrorRouteHandler: <Ctx extends HttpCrawlingContext<any, any>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
58
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
59
|
+
} & {
|
|
60
|
+
captureError: CaptureError;
|
|
61
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
62
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
63
|
+
}, "request"> & {
|
|
64
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
65
|
+
}) => Promise<void>;
|
|
66
|
+
export declare const jsdomCaptureErrorRouteHandler: <Ctx extends JSDOMCrawlingContext<any, any>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
67
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
68
|
+
} & {
|
|
69
|
+
captureError: CaptureError;
|
|
70
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
71
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
72
|
+
}, "request"> & {
|
|
73
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
74
|
+
}) => Promise<void>;
|
|
75
|
+
export declare const playwrightCaptureErrorRouteHandler: <Ctx extends PlaywrightCrawlingContext<import("crawlee").Dictionary>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
76
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
77
|
+
} & {
|
|
78
|
+
captureError: CaptureError;
|
|
79
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
80
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
81
|
+
}, "request"> & {
|
|
82
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
83
|
+
}) => Promise<void>;
|
|
84
|
+
export declare const cheerioCaptureErrorRouteHandler: <Ctx extends CheerioCrawlingContext<any, any>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
85
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
86
|
+
} & {
|
|
87
|
+
captureError: CaptureError;
|
|
88
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
89
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
90
|
+
}, "request"> & {
|
|
91
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
92
|
+
}) => Promise<void>;
|
|
93
|
+
export declare const puppeteerCaptureErrorRouteHandler: <Ctx extends PuppeteerCrawlingContext<import("crawlee").Dictionary>>(handler: (ctx: Omit<Ctx, "request"> & {
|
|
94
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
95
|
+
} & {
|
|
96
|
+
captureError: CaptureError;
|
|
97
|
+
}) => MaybePromise<void>, options: CrawleeOneErrorHandlerOptions<object, object>) => (ctx: Omit<Omit<Ctx & Record<string, any>, "request"> & {
|
|
98
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
99
|
+
}, "request"> & {
|
|
100
|
+
request: import("crawlee").Request<import("crawlee").Dictionary>;
|
|
101
|
+
}) => Promise<void>;
|
|
102
|
+
/**
|
|
103
|
+
* Create an `ErrorHandler` function that can be assigned to
|
|
104
|
+
* `failedRequestHandler` option of `BasicCrawlerOptions`.
|
|
105
|
+
*
|
|
106
|
+
* The function saves error to a Dataset, and optionally forwards it to Sentry.
|
|
107
|
+
*
|
|
108
|
+
* By default, error reports are saved to Apify Dataset.
|
|
109
|
+
*/
|
|
110
|
+
export declare const createErrorHandler: <Ctx extends CrawlingContext<unknown, import("crawlee").Dictionary>>(options: CrawleeOneErrorHandlerOptions & {
|
|
111
|
+
sendToSentry?: boolean;
|
|
112
|
+
}) => ErrorHandler<Ctx>;
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.createErrorHandler = exports.puppeteerCaptureErrorRouteHandler = exports.cheerioCaptureErrorRouteHandler = exports.playwrightCaptureErrorRouteHandler = exports.jsdomCaptureErrorRouteHandler = exports.httpCaptureErrorRouteHandler = exports.basicCaptureErrorRouteHandler = exports.captureErrorRouteHandler = exports.captureErrorWrapper = exports.captureError = void 0;
|
|
36
|
+
const Sentry = __importStar(require("@sentry/node"));
|
|
37
|
+
const apify_1 = require("../integrations/apify");
|
|
38
|
+
/**
|
|
39
|
+
* Error handling for CrawleeOne crawlers.
|
|
40
|
+
*
|
|
41
|
+
* By default, error reports are saved to Apify Dataset.
|
|
42
|
+
*
|
|
43
|
+
* See https://docs.apify.com/academy/node-js/analyzing-pages-and-fixing-errors#error-reporting
|
|
44
|
+
*/
|
|
45
|
+
const captureError = (input, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
46
|
+
var _a, _b, _c;
|
|
47
|
+
const { error, log: parentLog } = input;
|
|
48
|
+
const { io = apify_1.apifyIO, reportingDatasetId, onErrorCapture, } = options;
|
|
49
|
+
const log = (_a = parentLog === null || parentLog === void 0 ? void 0 : parentLog.child({ prefix: '[Error capture] ' })) !== null && _a !== void 0 ? _a : null;
|
|
50
|
+
log === null || log === void 0 ? void 0 : log.error(`ERROR ${error.name}: ${error.message}`, error);
|
|
51
|
+
console.error(`ERROR ${error.name}: ${error.message}`, error);
|
|
52
|
+
// Let's create reporting dataset
|
|
53
|
+
// If you already have one, this will continue adding to it
|
|
54
|
+
const reportingDataset = reportingDatasetId ? yield io.openDataset(reportingDatasetId) : null;
|
|
55
|
+
const report = yield io.generateErrorReport({ error, page: (_b = input.page) !== null && _b !== void 0 ? _b : null, url: (_c = input.url) !== null && _c !== void 0 ? _c : null, log }, Object.assign(Object.assign({}, options), { io }));
|
|
56
|
+
log === null || log === void 0 ? void 0 : log.error('[Error capture] Error captured', report);
|
|
57
|
+
// And we push the report
|
|
58
|
+
if (reportingDatasetId) {
|
|
59
|
+
log === null || log === void 0 ? void 0 : log.info(`[Error capture] Pushing error to dataset ${reportingDatasetId}`);
|
|
60
|
+
yield (reportingDataset === null || reportingDataset === void 0 ? void 0 : reportingDataset.pushData(report));
|
|
61
|
+
log === null || log === void 0 ? void 0 : log.info(`[Error capture] DONE pushing error to dataset ${reportingDatasetId}`);
|
|
62
|
+
}
|
|
63
|
+
log === null || log === void 0 ? void 0 : log.error('[Error capture] Calling onErrorCapture');
|
|
64
|
+
yield (onErrorCapture === null || onErrorCapture === void 0 ? void 0 : onErrorCapture({ error, report }));
|
|
65
|
+
log === null || log === void 0 ? void 0 : log.error('[Error capture] Done calling onErrorCapture');
|
|
66
|
+
// @ts-expect-error Tag the error, so we don't capture it twice.
|
|
67
|
+
error._crawleeOneErrorCaptured = true;
|
|
68
|
+
// Propagate the error
|
|
69
|
+
throw error;
|
|
70
|
+
});
|
|
71
|
+
exports.captureError = captureError;
|
|
72
|
+
/**
|
|
73
|
+
* Error handling for Crawlers as a function wrapper
|
|
74
|
+
*
|
|
75
|
+
* By default, error reports are saved to Apify Dataset.
|
|
76
|
+
*/
|
|
77
|
+
const captureErrorWrapper = (fn, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
78
|
+
const captureErrorWithArgs = (input) => (0, exports.captureError)(input, options);
|
|
79
|
+
try {
|
|
80
|
+
// Pass the error capturing function to the wrapped function, so it can trigger it by itself
|
|
81
|
+
yield fn({ captureError: captureErrorWithArgs });
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
if (!error._crawleeOneErrorCaptured) {
|
|
85
|
+
// And if the wrapped function fails, we capture error for them
|
|
86
|
+
yield captureErrorWithArgs({ error, url: null, page: null, log: null });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
exports.captureErrorWrapper = captureErrorWrapper;
|
|
91
|
+
/**
|
|
92
|
+
* Drop-in replacement for regular request handler callback for Crawlee route
|
|
93
|
+
* that automatically tracks errors.
|
|
94
|
+
*
|
|
95
|
+
* By default, error reports are saved to Apify Dataset.
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
*
|
|
99
|
+
* router.addDefaultHandler(
|
|
100
|
+
* captureErrorRouteHandler(async (ctx) => {
|
|
101
|
+
* const { page, crawler } = ctx;
|
|
102
|
+
* const url = page.url();
|
|
103
|
+
* ...
|
|
104
|
+
* })
|
|
105
|
+
* );
|
|
106
|
+
*/
|
|
107
|
+
const captureErrorRouteHandler = (handler, options) => {
|
|
108
|
+
// Wrap the original handler, so we can additionally pass it the captureError function
|
|
109
|
+
const wrapperHandler = (ctx) => {
|
|
110
|
+
return (0, exports.captureErrorWrapper)(({ captureError }) => {
|
|
111
|
+
return handler(Object.assign(Object.assign({}, ctx), {
|
|
112
|
+
// And automatically feed contextual args (page, url, log) to captureError
|
|
113
|
+
captureError: (input) => {
|
|
114
|
+
var _a, _b;
|
|
115
|
+
return captureError({
|
|
116
|
+
error: input.error,
|
|
117
|
+
page: (_a = input.page) !== null && _a !== void 0 ? _a : ctx.page,
|
|
118
|
+
url: input.url || ctx.request.url,
|
|
119
|
+
log: (_b = input.log) !== null && _b !== void 0 ? _b : ctx.log,
|
|
120
|
+
});
|
|
121
|
+
} }));
|
|
122
|
+
}, options);
|
|
123
|
+
};
|
|
124
|
+
return wrapperHandler;
|
|
125
|
+
};
|
|
126
|
+
exports.captureErrorRouteHandler = captureErrorRouteHandler;
|
|
127
|
+
const basicCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
128
|
+
exports.basicCaptureErrorRouteHandler = basicCaptureErrorRouteHandler;
|
|
129
|
+
const httpCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
130
|
+
exports.httpCaptureErrorRouteHandler = httpCaptureErrorRouteHandler;
|
|
131
|
+
const jsdomCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
132
|
+
exports.jsdomCaptureErrorRouteHandler = jsdomCaptureErrorRouteHandler;
|
|
133
|
+
const playwrightCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
134
|
+
exports.playwrightCaptureErrorRouteHandler = playwrightCaptureErrorRouteHandler;
|
|
135
|
+
const cheerioCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
136
|
+
exports.cheerioCaptureErrorRouteHandler = cheerioCaptureErrorRouteHandler;
|
|
137
|
+
const puppeteerCaptureErrorRouteHandler = (...args) => (0, exports.captureErrorRouteHandler)(...args); // prettier-ignore
|
|
138
|
+
exports.puppeteerCaptureErrorRouteHandler = puppeteerCaptureErrorRouteHandler;
|
|
139
|
+
/**
|
|
140
|
+
* Create an `ErrorHandler` function that can be assigned to
|
|
141
|
+
* `failedRequestHandler` option of `BasicCrawlerOptions`.
|
|
142
|
+
*
|
|
143
|
+
* The function saves error to a Dataset, and optionally forwards it to Sentry.
|
|
144
|
+
*
|
|
145
|
+
* By default, error reports are saved to Apify Dataset.
|
|
146
|
+
*/
|
|
147
|
+
const createErrorHandler = (options) => {
|
|
148
|
+
return ({ request, log, page }, error) => __awaiter(void 0, void 0, void 0, function* () {
|
|
149
|
+
var _a;
|
|
150
|
+
const url = request.loadedUrl || request.url;
|
|
151
|
+
(0, exports.captureError)({ error, url, log, page: page }, {
|
|
152
|
+
io: options.io,
|
|
153
|
+
reportingDatasetId: options.reportingDatasetId,
|
|
154
|
+
allowScreenshot: (_a = options.allowScreenshot) !== null && _a !== void 0 ? _a : true,
|
|
155
|
+
onErrorCapture: ({ error, report }) => {
|
|
156
|
+
if (!options.sendToSentry)
|
|
157
|
+
return;
|
|
158
|
+
Sentry.captureException(error, { extra: report });
|
|
159
|
+
},
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
};
|
|
163
|
+
exports.createErrorHandler = createErrorHandler;
|
|
164
|
+
//# sourceMappingURL=errorHandler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errorHandler.js","sourceRoot":"","sources":["../../../../src/lib/error/errorHandler.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAUA,qDAAuC;AAUvC,iDAAgD;AAKhD;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAC1B,KAAwB,EACxB,OAAqD,EACrD,EAAE;;IACF,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EACJ,EAAE,GAAG,eAA6C,EAClD,kBAAkB,EAClB,cAAc,GACf,GAAG,OAAO,CAAC;IAEZ,MAAM,GAAG,GAAG,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,mCAAI,IAAI,CAAC;IAErE,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAE9D,iCAAiC;IACjC,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,kBAAkB,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,WAAW,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9F,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,mBAAmB,CACzC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,IAAI,EAAE,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,IAAI,EAAE,GAAG,EAAE,kCAC3D,OAAO,KAAE,EAAE,IACjB,CAAC;IAEF,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;IAErD,yBAAyB;IACzB,IAAI,kBAAkB,EAAE;QACtB,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,4CAA4C,kBAAkB,EAAE,CAAC,CAAC;QAC5E,MAAM,CAAA,gBAAgB,aAAhB,gBAAgB,uBAAhB,gBAAgB,CAAE,QAAQ,CAAC,MAAM,CAAC,CAAA,CAAC;QACzC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,iDAAiD,kBAAkB,EAAE,CAAC,CAAC;KAClF;IAED,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,wCAAwC,CAAC,CAAC;IACrD,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAA,CAAC;IAC1C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAE1D,gEAAgE;IAChE,KAAK,CAAC,wBAAwB,GAAG,IAAI,CAAC;IACtC,sBAAsB;IACtB,MAAM,KAAK,CAAC;AACd,CAAC,CAAA,CAAC;AAzCW,QAAA,YAAY,gBAyCvB;AAEF;;;;GAIG;AACI,MAAM,mBAAmB,GAAG,CAIjC,EAAiE,EACjE,OAAqD,EACrD,EAAE;IACF,MAAM,oBAAoB,GAAiB,CAAC,KAAK,EAAE,EAAE,CAAC,IAAA,oBAAY,EAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAEnF,IAAI;QACF,4FAA4F;QAC5F,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,oBAAoB,EAAE,CAAC,CAAC;KAClD;IAAC,OAAO,KAAU,EAAE;QACnB,IAAI,CAAC,KAAK,CAAC,wBAAwB,EAAE;YACnC,+DAA+D;YAC/D,MAAM,oBAAoB,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;SACzE;KACF;AACH,CAAC,CAAA,CAAC;AAlBW,QAAA,mBAAmB,uBAkB9B;AAEF;;;;;;;;;;;;;;;GAeG;AACI,MAAM,wBAAwB,GAAG,CAKtC,OAA4F,EAC5F,OAAqD,EACrD,EAAE;IACF,sFAAsF;IACtF,MAAM,cAAc,GAAG,CAAC,GAAqC,EAAE,EAAE;QAC/D,OAAO,IAAA,2BAAmB,EAAC,CAAC,EAAE,YAAY,EAAE,EAAE,EAAE;YAC9C,OAAO,OAAO,iCACR,GAAW;gBACf,0EAA0E;gBAC1E,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;;oBACtB,OAAA,YAAY,CAAC;wBACX,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,GAAG,CAAC,IAAI;wBAC5B,GAAG,EAAE,KAAK,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG;wBACjC,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,GAAG,CAAC,GAAG;qBAC1B,CAAC,CAAA;iBAAA,IACJ,CAAC;QACL,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC,CAAC;IACF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAzBW,QAAA,wBAAwB,4BAyBnC;AAEK,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,4BAA4B,GAAG,CAAkC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAzL,QAAA,4BAA4B,gCAA0I;AAC5K,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,kCAAkC,GAAG,CAAwC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAArM,QAAA,kCAAkC,sCAAgJ;AACxL,MAAM,+BAA+B,GAAG,CAAqC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA/L,QAAA,+BAA+B,mCAA6I;AAClL,MAAM,iCAAiC,GAAG,CAAuC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAnM,QAAA,iCAAiC,qCAA+I;AAE7L;;;;;;;GAOG;AACI,MAAM,kBAAkB,GAAG,CAChC,OAAmE,EAChD,EAAE;IACrB,OAAO,CAAO,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE;;QAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC;QAC7C,IAAA,oBAAY,EACV,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,IAAY,EAAE,EACvC;YACE,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,kBAAkB,EAAE,OAAO,CAAC,kBAAkB;YAC9C,eAAe,EAAE,MAAA,OAAO,CAAC,eAAe,mCAAI,IAAI;YAChD,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;gBACpC,IAAI,CAAC,OAAO,CAAC,YAAY;oBAAE,OAAO;gBAElC,MAAM,CAAC,gBAAgB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAa,EAAE,CAAC,CAAC;YAC3D,CAAC;SACF,CACF,CAAC;IACJ,CAAC,CAAA,CAAC;AACJ,CAAC,CAAC;AAnBW,QAAA,kBAAkB,sBAmB7B","sourcesContent":["import type {\n BasicCrawlingContext,\n CheerioCrawlingContext,\n CrawlingContext,\n ErrorHandler,\n HttpCrawlingContext,\n JSDOMCrawlingContext,\n PlaywrightCrawlingContext,\n PuppeteerCrawlingContext,\n} from 'crawlee';\nimport * as Sentry from '@sentry/node';\nimport type { Page } from 'playwright';\n\nimport type { MaybePromise, PickRequired } from '../../utils/types';\nimport type { RouteHandler, RouterHandlerCtx } from '../router';\nimport type {\n CrawleeOneErrorHandlerInput,\n CrawleeOneErrorHandlerOptions,\n CrawleeOneIO,\n} from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\n\nexport type CaptureErrorInput = PickRequired<Partial<CrawleeOneErrorHandlerInput>, 'error'>;\nexport type CaptureError = (input: CaptureErrorInput) => MaybePromise<void>;\n\n/**\n * Error handling for CrawleeOne crawlers.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * See https://docs.apify.com/academy/node-js/analyzing-pages-and-fixing-errors#error-reporting\n */\nexport const captureError = async <TEnv extends object = object, TReport extends object = object>(\n input: CaptureErrorInput,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const { error, log: parentLog } = input;\n const {\n io = apifyIO as any as CrawleeOneIO<TEnv, TReport>,\n reportingDatasetId,\n onErrorCapture,\n } = options;\n\n const log = parentLog?.child({ prefix: '[Error capture] ' }) ?? null;\n\n log?.error(`ERROR ${error.name}: ${error.message}`, error);\n console.error(`ERROR ${error.name}: ${error.message}`, error);\n\n // Let's create reporting dataset\n // If you already have one, this will continue adding to it\n const reportingDataset = reportingDatasetId ? await io.openDataset(reportingDatasetId) : null;\n const report = await io.generateErrorReport(\n { error, page: input.page ?? null, url: input.url ?? null, log },\n { ...options, io }\n );\n\n log?.error('[Error capture] Error captured', report);\n\n // And we push the report\n if (reportingDatasetId) {\n log?.info(`[Error capture] Pushing error to dataset ${reportingDatasetId}`);\n await reportingDataset?.pushData(report);\n log?.info(`[Error capture] DONE pushing error to dataset ${reportingDatasetId}`);\n }\n\n log?.error('[Error capture] Calling onErrorCapture');\n await onErrorCapture?.({ error, report });\n log?.error('[Error capture] Done calling onErrorCapture');\n\n // @ts-expect-error Tag the error, so we don't capture it twice.\n error._crawleeOneErrorCaptured = true;\n // Propagate the error\n throw error;\n};\n\n/**\n * Error handling for Crawlers as a function wrapper\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const captureErrorWrapper = async <\n TEnv extends object = object,\n TReport extends object = object\n>(\n fn: (input: { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const captureErrorWithArgs: CaptureError = (input) => captureError(input, options);\n\n try {\n // Pass the error capturing function to the wrapped function, so it can trigger it by itself\n await fn({ captureError: captureErrorWithArgs });\n } catch (error: any) {\n if (!error._crawleeOneErrorCaptured) {\n // And if the wrapped function fails, we capture error for them\n await captureErrorWithArgs({ error, url: null, page: null, log: null });\n }\n }\n};\n\n/**\n * Drop-in replacement for regular request handler callback for Crawlee route\n * that automatically tracks errors.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * @example\n *\n * router.addDefaultHandler(\n * captureErrorRouteHandler(async (ctx) => {\n * const { page, crawler } = ctx;\n * const url = page.url();\n * ...\n * })\n * );\n */\nexport const captureErrorRouteHandler = <\n Ctx extends CrawlingContext,\n TEnv extends object = object,\n TReport extends object = object\n>(\n handler: (ctx: RouterHandlerCtx<Ctx> & { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n // Wrap the original handler, so we can additionally pass it the captureError function\n const wrapperHandler = (ctx: Parameters<RouteHandler<Ctx>>[0]) => {\n return captureErrorWrapper(({ captureError }) => {\n return handler({\n ...(ctx as any),\n // And automatically feed contextual args (page, url, log) to captureError\n captureError: (input) =>\n captureError({\n error: input.error,\n page: input.page ?? ctx.page,\n url: input.url || ctx.request.url,\n log: input.log ?? ctx.log,\n }),\n });\n }, options);\n };\n return wrapperHandler;\n};\n\nexport const basicCaptureErrorRouteHandler = <Ctx extends BasicCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const httpCaptureErrorRouteHandler = <Ctx extends HttpCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const jsdomCaptureErrorRouteHandler = <Ctx extends JSDOMCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const playwrightCaptureErrorRouteHandler = <Ctx extends PlaywrightCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const cheerioCaptureErrorRouteHandler = <Ctx extends CheerioCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const puppeteerCaptureErrorRouteHandler = <Ctx extends PuppeteerCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\n\n/**\n * Create an `ErrorHandler` function that can be assigned to\n * `failedRequestHandler` option of `BasicCrawlerOptions`.\n *\n * The function saves error to a Dataset, and optionally forwards it to Sentry.\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const createErrorHandler = <Ctx extends CrawlingContext>(\n options: CrawleeOneErrorHandlerOptions & { sendToSentry?: boolean }\n): ErrorHandler<Ctx> => {\n return async ({ request, log, page }, error) => {\n const url = request.loadedUrl || request.url;\n captureError(\n { error, url, log, page: page as Page },\n {\n io: options.io,\n reportingDatasetId: options.reportingDatasetId,\n allowScreenshot: options.allowScreenshot ?? true,\n onErrorCapture: ({ error, report }) => {\n if (!options.sendToSentry) return;\n\n Sentry.captureException(error, { extra: report as any });\n },\n }\n );\n };\n};\n"]}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import * as Sentry from '@sentry/node';
|
|
2
|
+
import type { CrawleeOneIO } from '../integrations/types';
|
|
3
|
+
/**
|
|
4
|
+
* Sentry configuration common to all crawlers.
|
|
5
|
+
*
|
|
6
|
+
* By default, sentry is enabled only on the server.
|
|
7
|
+
* In Apify, whis is when `process.env.APIFY_IS_AT_HOME` is true.
|
|
8
|
+
*/
|
|
9
|
+
export declare const setupSentry: (sentryOptions?: Sentry.NodeOptions, options?: {
|
|
10
|
+
io?: CrawleeOneIO;
|
|
11
|
+
}) => Promise<void>;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.setupSentry = void 0;
|
|
36
|
+
const Sentry = __importStar(require("@sentry/node"));
|
|
37
|
+
const apify_1 = require("../integrations/apify");
|
|
38
|
+
/**
|
|
39
|
+
* Sentry configuration common to all crawlers.
|
|
40
|
+
*
|
|
41
|
+
* By default, sentry is enabled only on the server.
|
|
42
|
+
* In Apify, whis is when `process.env.APIFY_IS_AT_HOME` is true.
|
|
43
|
+
*/
|
|
44
|
+
const setupSentry = (sentryOptions, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
45
|
+
const { io = apify_1.apifyIO } = options !== null && options !== void 0 ? options : {};
|
|
46
|
+
// As default, enable sentry only on Apify server
|
|
47
|
+
const enabled = (sentryOptions === null || sentryOptions === void 0 ? void 0 : sentryOptions.enabled) != null ? sentryOptions.enabled : yield io.isTelemetryEnabled();
|
|
48
|
+
if (!enabled)
|
|
49
|
+
return;
|
|
50
|
+
// We use this field for identification in UI, so it's required.
|
|
51
|
+
if (!(sentryOptions === null || sentryOptions === void 0 ? void 0 : sentryOptions.serverName))
|
|
52
|
+
throw Error('Sentry setup is missing "serverName" property.'); // prettier-ignore
|
|
53
|
+
Sentry.init(Object.assign({ dsn: 'https://5b2e0562b4ec4ef6805a3fbbf4ff8acd@o470159.ingest.sentry.io/4505019830370304',
|
|
54
|
+
// Set tracesSampleRate to 1.0 to capture 100%
|
|
55
|
+
// of transactions for performance monitoring.
|
|
56
|
+
// We recommend adjusting this value in production
|
|
57
|
+
tracesSampleRate: 1.0 }, sentryOptions));
|
|
58
|
+
});
|
|
59
|
+
exports.setupSentry = setupSentry;
|
|
60
|
+
//# sourceMappingURL=sentry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentry.js","sourceRoot":"","sources":["../../../../src/lib/error/sentry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,qDAAuC;AAGvC,iDAAgD;AAEhD;;;;;GAKG;AACI,MAAM,WAAW,GAAG,CACzB,aAAkC,EAClC,OAA+B,EAC/B,EAAE;IACF,MAAM,EAAE,EAAE,GAAG,eAAO,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAEvC,iDAAiD;IACjD,MAAM,OAAO,GACX,CAAA,aAAa,aAAb,aAAa,uBAAb,aAAa,CAAE,OAAO,KAAI,IAAI,CAAC,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,kBAAkB,EAAE,CAAC;IAEzF,IAAI,CAAC,OAAO;QAAE,OAAO;IAErB,gEAAgE;IAChE,IAAI,CAAC,CAAA,aAAa,aAAb,aAAa,uBAAb,aAAa,CAAE,UAAU,CAAA;QAAE,MAAM,KAAK,CAAC,gDAAgD,CAAC,CAAC,CAAC,kBAAkB;IAEjH,MAAM,CAAC,IAAI,iBACT,GAAG,EAAE,oFAAoF;QAEzF,8CAA8C;QAC9C,8CAA8C;QAC9C,kDAAkD;QAClD,gBAAgB,EAAE,GAAG,IAElB,aAAa,EAChB,CAAC;AACL,CAAC,CAAA,CAAC;AAzBW,QAAA,WAAW,eAyBtB","sourcesContent":["import * as Sentry from '@sentry/node';\n\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\n\n/**\n * Sentry configuration common to all crawlers.\n *\n * By default, sentry is enabled only on the server.\n * In Apify, whis is when `process.env.APIFY_IS_AT_HOME` is true.\n */\nexport const setupSentry = async (\n sentryOptions?: Sentry.NodeOptions,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO } = options ?? {};\n\n // As default, enable sentry only on Apify server\n const enabled =\n sentryOptions?.enabled != null ? sentryOptions.enabled : await io.isTelemetryEnabled();\n\n if (!enabled) return;\n\n // We use this field for identification in UI, so it's required.\n if (!sentryOptions?.serverName) throw Error('Sentry setup is missing \"serverName\" property.'); // prettier-ignore\n\n Sentry.init({\n dsn: 'https://5b2e0562b4ec4ef6805a3fbbf4ff8acd@o470159.ingest.sentry.io/4505019830370304',\n\n // Set tracesSampleRate to 1.0 to capture 100%\n // of transactions for performance monitoring.\n // We recommend adjusting this value in production\n tracesSampleRate: 1.0,\n\n ...sentryOptions,\n });\n};\n"]}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { ApifyEnv } from 'apify';
|
|
2
|
+
import { CrawlingContext } from 'crawlee';
|
|
3
|
+
import type { CrawleeOneIO } from './types';
|
|
4
|
+
export interface ApifyErrorReport {
|
|
5
|
+
actorId: string | null;
|
|
6
|
+
actorRunId: string | null;
|
|
7
|
+
actorRunUrl: string;
|
|
8
|
+
errorName: string;
|
|
9
|
+
errorMessage: string;
|
|
10
|
+
pageUrl: string | null;
|
|
11
|
+
pageHtmlSnapshot: string | null;
|
|
12
|
+
pageScreenshot: string | null;
|
|
13
|
+
}
|
|
14
|
+
export interface ApifyEntryMetadata {
|
|
15
|
+
actorId: string | null;
|
|
16
|
+
actorRunId: string | null;
|
|
17
|
+
actorRunUrl: string | null;
|
|
18
|
+
contextId: string;
|
|
19
|
+
requestId: string | null;
|
|
20
|
+
/** The URL given to the crawler */
|
|
21
|
+
originalUrl: string | null;
|
|
22
|
+
/** The URL given to the crawler after possible redirects */
|
|
23
|
+
loadedUrl: string | null;
|
|
24
|
+
/** ISO datetime string that indicates the time when the request has been processed. */
|
|
25
|
+
dateHandled: string;
|
|
26
|
+
numberOfRetries: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Integration between CrawleeOne and Apify.
|
|
30
|
+
*
|
|
31
|
+
* This is the default integration.
|
|
32
|
+
*/
|
|
33
|
+
export type ApifyCrawleeOneIO = CrawleeOneIO<ApifyEnv, ApifyErrorReport, ApifyEntryMetadata>;
|
|
34
|
+
/**
|
|
35
|
+
* Integration between CrawleeOne and Apify.
|
|
36
|
+
*
|
|
37
|
+
* This is the default integration.
|
|
38
|
+
*/
|
|
39
|
+
export declare const apifyIO: {
|
|
40
|
+
openDataset: (id?: string | null | undefined) => Promise<{
|
|
41
|
+
pushData: (data: import("crawlee").Dictionary | import("crawlee").Dictionary[]) => Promise<void>;
|
|
42
|
+
getItems: (options?: Pick<import("apify").DatasetDataOptions, "offset" | "desc" | "limit" | "fields"> | undefined) => import("../../utils/types").MaybePromise<object[]>;
|
|
43
|
+
getItemCount: () => Promise<number | null>;
|
|
44
|
+
}>;
|
|
45
|
+
openRequestQueue: (id?: string | null | undefined) => Promise<import("apify").RequestQueue>;
|
|
46
|
+
openKeyValueStore: (id?: string | null | undefined) => Promise<import("apify").KeyValueStore>;
|
|
47
|
+
getEnv: () => ApifyEnv;
|
|
48
|
+
getInput: <Input extends object>() => Promise<Input | null>;
|
|
49
|
+
runInContext: (userFunc: () => unknown, options?: import("apify").ExitOptions | undefined) => Promise<void>;
|
|
50
|
+
triggerDownstreamCrawler: <TInput extends object>(targetActorId: string, input?: TInput | undefined, options?: {
|
|
51
|
+
build?: string | undefined;
|
|
52
|
+
} | undefined) => Promise<void>;
|
|
53
|
+
createDefaultProxyConfiguration: (input: any) => Promise<import("apify").ProxyConfiguration | undefined>;
|
|
54
|
+
isTelemetryEnabled: () => boolean;
|
|
55
|
+
generateErrorReport: (input: import("./types").CrawleeOneErrorHandlerInput, options: import("../../utils/types").PickRequired<import("./types").CrawleeOneErrorHandlerOptions<ApifyEnv, ApifyErrorReport>, "io">) => import("../../utils/types").MaybePromise<ApifyErrorReport>;
|
|
56
|
+
generateEntryMetadata: <Ctx extends CrawlingContext<unknown, import("crawlee").Dictionary>>(ctx: Ctx) => {
|
|
57
|
+
actorId: string | null;
|
|
58
|
+
actorRunId: string | null;
|
|
59
|
+
actorRunUrl: string | null;
|
|
60
|
+
contextId: string;
|
|
61
|
+
requestId: string | null;
|
|
62
|
+
originalUrl: string;
|
|
63
|
+
loadedUrl: string | null;
|
|
64
|
+
dateHandled: string;
|
|
65
|
+
numberOfRetries: number;
|
|
66
|
+
};
|
|
67
|
+
};
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.apifyIO = void 0;
|
|
13
|
+
const apify_1 = require("apify");
|
|
14
|
+
const crawlee_1 = require("crawlee");
|
|
15
|
+
const generateApifyErrorReport = (input, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
16
|
+
const { error, page, url, log } = input;
|
|
17
|
+
const { io, allowScreenshot } = options;
|
|
18
|
+
// storeId is ID of current key-value store, where we save snapshots
|
|
19
|
+
// We can also capture actor and run IDs
|
|
20
|
+
// to have easy access in the reporting dataset
|
|
21
|
+
const { actorId, actorRunId, defaultKeyValueStoreId: storeId } = yield io.getEnv();
|
|
22
|
+
const actorRunUrl = `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`;
|
|
23
|
+
const randomNumber = Math.random();
|
|
24
|
+
const key = `ERROR-${randomNumber}`;
|
|
25
|
+
let pageScreenshot = null;
|
|
26
|
+
let pageHtmlSnapshot = null;
|
|
27
|
+
let pageUrl = url !== null && url !== void 0 ? url : null;
|
|
28
|
+
if (page && allowScreenshot) {
|
|
29
|
+
pageUrl = pageUrl || page.url();
|
|
30
|
+
log === null || log === void 0 ? void 0 : log.info('Capturing page snapshot');
|
|
31
|
+
yield crawlee_1.playwrightUtils.saveSnapshot(page, { key });
|
|
32
|
+
log === null || log === void 0 ? void 0 : log.info('DONE capturing page snapshot');
|
|
33
|
+
// You will have to adjust the keys if you save them in a non-standard way
|
|
34
|
+
pageScreenshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.jpg?disableRedirect=true`;
|
|
35
|
+
pageHtmlSnapshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.html?disableRedirect=true`;
|
|
36
|
+
}
|
|
37
|
+
// We create a report object
|
|
38
|
+
const report = {
|
|
39
|
+
actorId,
|
|
40
|
+
actorRunId,
|
|
41
|
+
actorRunUrl,
|
|
42
|
+
errorName: error.name,
|
|
43
|
+
errorMessage: error.toString(),
|
|
44
|
+
pageUrl,
|
|
45
|
+
pageHtmlSnapshot,
|
|
46
|
+
pageScreenshot,
|
|
47
|
+
};
|
|
48
|
+
return report;
|
|
49
|
+
});
|
|
50
|
+
const generateApifyEntryMetadata = (ctx) => {
|
|
51
|
+
var _a, _b, _c;
|
|
52
|
+
const { actorId, actorRunId } = apify_1.Actor.getEnv();
|
|
53
|
+
const actorRunUrl = actorId != null && actorRunId != null
|
|
54
|
+
? `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`
|
|
55
|
+
: null;
|
|
56
|
+
const handledAt = new Date().toISOString();
|
|
57
|
+
const metadata = {
|
|
58
|
+
actorId,
|
|
59
|
+
actorRunId,
|
|
60
|
+
actorRunUrl,
|
|
61
|
+
contextId: ctx.id,
|
|
62
|
+
requestId: (_a = ctx.request.id) !== null && _a !== void 0 ? _a : null,
|
|
63
|
+
originalUrl: (_b = ctx.request.url) !== null && _b !== void 0 ? _b : null,
|
|
64
|
+
loadedUrl: (_c = ctx.request.loadedUrl) !== null && _c !== void 0 ? _c : null,
|
|
65
|
+
dateHandled: ctx.request.handledAt || handledAt,
|
|
66
|
+
numberOfRetries: ctx.request.retryCount,
|
|
67
|
+
};
|
|
68
|
+
return metadata;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Integration between CrawleeOne and Apify.
|
|
72
|
+
*
|
|
73
|
+
* This is the default integration.
|
|
74
|
+
*/
|
|
75
|
+
exports.apifyIO = {
|
|
76
|
+
openDataset: (...args) => __awaiter(void 0, void 0, void 0, function* () {
|
|
77
|
+
const dataset = yield apify_1.Actor.openDataset(...args);
|
|
78
|
+
const getItemCount = () => __awaiter(void 0, void 0, void 0, function* () { var _a, _b; return (_b = (_a = (yield dataset.getInfo())) === null || _a === void 0 ? void 0 : _a.itemCount) !== null && _b !== void 0 ? _b : null; });
|
|
79
|
+
const getItems = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
80
|
+
const result = yield dataset.getData(Object.assign(Object.assign({}, options), { skipEmpty: true }));
|
|
81
|
+
return result.items;
|
|
82
|
+
});
|
|
83
|
+
return {
|
|
84
|
+
pushData: dataset.pushData.bind(dataset),
|
|
85
|
+
getItems,
|
|
86
|
+
getItemCount,
|
|
87
|
+
};
|
|
88
|
+
}),
|
|
89
|
+
openRequestQueue: (...args) => apify_1.Actor.openRequestQueue(...args),
|
|
90
|
+
openKeyValueStore: (...args) => apify_1.Actor.openKeyValueStore(...args),
|
|
91
|
+
getEnv: (...args) => apify_1.Actor.getEnv(...args),
|
|
92
|
+
getInput: (...args) => apify_1.Actor.getInput(...args),
|
|
93
|
+
runInContext: (...args) => __awaiter(void 0, void 0, void 0, function* () {
|
|
94
|
+
yield apify_1.Actor.main(...args);
|
|
95
|
+
}),
|
|
96
|
+
triggerDownstreamCrawler: (...args) => apify_1.Actor.metamorph(...args),
|
|
97
|
+
createDefaultProxyConfiguration: (input) => __awaiter(void 0, void 0, void 0, function* () {
|
|
98
|
+
return process.env.APIFY_IS_AT_HOME
|
|
99
|
+
? yield apify_1.Actor.createProxyConfiguration(input === null || input === void 0 ? void 0 : input.proxy)
|
|
100
|
+
: undefined;
|
|
101
|
+
}),
|
|
102
|
+
isTelemetryEnabled: () => !!process.env.APIFY_IS_AT_HOME,
|
|
103
|
+
generateErrorReport: generateApifyErrorReport,
|
|
104
|
+
generateEntryMetadata: generateApifyEntryMetadata,
|
|
105
|
+
};
|
|
106
|
+
//# sourceMappingURL=apify.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"apify.js","sourceRoot":"","sources":["../../../../src/lib/integrations/apify.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,iCAAwC;AACxC,qCAA2D;AAuC3D,MAAM,wBAAwB,GAA6C,CACzE,KAAK,EACL,OAAO,EACP,EAAE;IACF,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EAAE,EAAE,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IAExC,oEAAoE;IACpE,wCAAwC;IACxC,+CAA+C;IAC/C,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,sBAAsB,EAAE,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC;IAEnF,MAAM,WAAW,GAAG,oCAAoC,OAAO,SAAS,UAAU,EAAE,CAAC;IAErF,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,SAAS,YAAY,EAAE,CAAC;IAEpC,IAAI,cAAc,GAAkB,IAAI,CAAC;IACzC,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,OAAO,GAAkB,GAAG,aAAH,GAAG,cAAH,GAAG,GAAI,IAAI,CAAC;IACzC,IAAI,IAAI,IAAI,eAAe,EAAE;QAC3B,OAAO,GAAG,OAAO,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;QAChC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACrC,MAAM,yBAAe,CAAC,YAAY,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAClD,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC1C,0EAA0E;QAC1E,cAAc,GAAG,6CAA6C,OAAO,YAAY,GAAG,2BAA2B,CAAC;QAChH,gBAAgB,GAAG,6CAA6C,OAAO,YAAY,GAAG,4BAA4B,CAAC;KACpH;IAED,4BAA4B;IAC5B,MAAM,MAAM,GAAG;QACb,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,KAAK,CAAC,IAAI;QACrB,YAAY,EAAE,KAAK,CAAC,QAAQ,EAAE;QAE9B,OAAO;QACP,gBAAgB;QAChB,cAAc;KACY,CAAC;IAE7B,OAAO,MAAM,CAAC;AAChB,CAAC,CAAA,CAAC;AAEF,MAAM,0BAA0B,GAAG,CAA8B,GAAQ,EAAE,EAAE;;IAC3E,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,aAAK,CAAC,MAAM,EAAE,CAAC;IAC/C,MAAM,WAAW,GACf,OAAO,IAAI,IAAI,IAAI,UAAU,IAAI,IAAI;QACnC,CAAC,CAAC,oCAAoC,OAAO,SAAS,UAAU,EAAE;QAClE,CAAC,CAAC,IAAI,CAAC;IACX,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE3C,MAAM,QAAQ,GAAG;QACf,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,GAAG,CAAC,EAAE;QACjB,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,EAAE,mCAAI,IAAI;QAEjC,WAAW,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,GAAG,mCAAI,IAAI;QACpC,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,SAAS,mCAAI,IAAI;QAExC,WAAW,EAAE,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,SAAS;QAC/C,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,UAAU;KACX,CAAC;IAE/B,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC;AAEF;;;;GAIG;AACU,QAAA,OAAO,GAAG;IACrB,WAAW,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC7B,MAAM,OAAO,GAAG,MAAM,aAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,GAAS,EAAE,8DAAC,OAAA,MAAA,MAAA,CAAC,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC,0CAAE,SAAS,mCAAI,IAAI,CAAA,GAAA,CAAC;QAC9E,MAAM,QAAQ,GAAkC,CAAO,OAAO,EAAE,EAAE;YAChE,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,OAAO,iCAC/B,OAAO,KACV,SAAS,EAAE,IAAI,IACf,CAAC;YACH,OAAO,MAAM,CAAC,KAAK,CAAC;QACtB,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;YACxC,QAAQ;YACR,YAAY;SACb,CAAC;IACJ,CAAC,CAAA;IACD,gBAAgB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC;IAC9D,iBAAiB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,iBAAiB,CAAC,GAAG,IAAI,CAAC;IAChE,MAAM,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC1C,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;IAC9C,YAAY,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC9B,MAAM,aAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAA;IACD,wBAAwB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC;IAC/D,+BAA+B,EAAE,CAAO,KAAU,EAAE,EAAE;QACpD,OAAO,OAAO,CAAC,GAAG,CAAC,gBAAgB;YACjC,CAAC,CAAC,MAAM,aAAK,CAAC,wBAAwB,CAAC,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,KAAK,CAAC;YACpD,CAAC,CAAC,SAAS,CAAC;IAChB,CAAC,CAAA;IACD,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB;IACxD,mBAAmB,EAAE,wBAAwB;IAC7C,qBAAqB,EAAE,0BAA0B;CACtB,CAAC","sourcesContent":["import { Actor, ApifyEnv } from 'apify';\nimport { CrawlingContext, playwrightUtils } from 'crawlee';\n\nimport type { CrawleeOneDataset, CrawleeOneIO } from './types';\n\nexport interface ApifyErrorReport {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string;\n errorName: string;\n errorMessage: string;\n pageUrl: string | null;\n pageHtmlSnapshot: string | null;\n pageScreenshot: string | null;\n}\n\nexport interface ApifyEntryMetadata {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string | null;\n contextId: string;\n requestId: string | null;\n\n /** The URL given to the crawler */\n originalUrl: string | null;\n /** The URL given to the crawler after possible redirects */\n loadedUrl: string | null;\n\n /** ISO datetime string that indicates the time when the request has been processed. */\n dateHandled: string;\n numberOfRetries: number;\n}\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport type ApifyCrawleeOneIO = CrawleeOneIO<ApifyEnv, ApifyErrorReport, ApifyEntryMetadata>;\n\nconst generateApifyErrorReport: ApifyCrawleeOneIO['generateErrorReport'] = async (\n input,\n options\n) => {\n const { error, page, url, log } = input;\n const { io, allowScreenshot } = options;\n\n // storeId is ID of current key-value store, where we save snapshots\n // We can also capture actor and run IDs\n // to have easy access in the reporting dataset\n const { actorId, actorRunId, defaultKeyValueStoreId: storeId } = await io.getEnv();\n\n const actorRunUrl = `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`;\n\n const randomNumber = Math.random();\n const key = `ERROR-${randomNumber}`;\n\n let pageScreenshot: string | null = null;\n let pageHtmlSnapshot: string | null = null;\n let pageUrl: string | null = url ?? null;\n if (page && allowScreenshot) {\n pageUrl = pageUrl || page.url();\n log?.info('Capturing page snapshot');\n await playwrightUtils.saveSnapshot(page, { key });\n log?.info('DONE capturing page snapshot');\n // You will have to adjust the keys if you save them in a non-standard way\n pageScreenshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.jpg?disableRedirect=true`;\n pageHtmlSnapshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.html?disableRedirect=true`;\n }\n\n // We create a report object\n const report = {\n actorId,\n actorRunId,\n actorRunUrl,\n errorName: error.name,\n errorMessage: error.toString(),\n\n pageUrl,\n pageHtmlSnapshot,\n pageScreenshot,\n } satisfies ApifyErrorReport;\n\n return report;\n};\n\nconst generateApifyEntryMetadata = <Ctx extends CrawlingContext>(ctx: Ctx) => {\n const { actorId, actorRunId } = Actor.getEnv();\n const actorRunUrl =\n actorId != null && actorRunId != null\n ? `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`\n : null;\n const handledAt = new Date().toISOString();\n\n const metadata = {\n actorId,\n actorRunId,\n actorRunUrl,\n contextId: ctx.id,\n requestId: ctx.request.id ?? null,\n\n originalUrl: ctx.request.url ?? null,\n loadedUrl: ctx.request.loadedUrl ?? null,\n\n dateHandled: ctx.request.handledAt || handledAt,\n numberOfRetries: ctx.request.retryCount,\n } satisfies ApifyEntryMetadata;\n\n return metadata;\n};\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport const apifyIO = {\n openDataset: async (...args) => {\n const dataset = await Actor.openDataset(...args);\n const getItemCount = async () => (await dataset.getInfo())?.itemCount ?? null;\n const getItems: CrawleeOneDataset['getItems'] = async (options) => {\n const result = await dataset.getData({\n ...options,\n skipEmpty: true,\n });\n return result.items;\n };\n\n return {\n pushData: dataset.pushData.bind(dataset),\n getItems,\n getItemCount,\n };\n },\n openRequestQueue: (...args) => Actor.openRequestQueue(...args),\n openKeyValueStore: (...args) => Actor.openKeyValueStore(...args),\n getEnv: (...args) => Actor.getEnv(...args),\n getInput: (...args) => Actor.getInput(...args),\n runInContext: async (...args) => {\n await Actor.main(...args);\n },\n triggerDownstreamCrawler: (...args) => Actor.metamorph(...args),\n createDefaultProxyConfiguration: async (input: any) => {\n return process.env.APIFY_IS_AT_HOME\n ? await Actor.createProxyConfiguration(input?.proxy)\n : undefined;\n },\n isTelemetryEnabled: () => !!process.env.APIFY_IS_AT_HOME,\n generateErrorReport: generateApifyErrorReport,\n generateEntryMetadata: generateApifyEntryMetadata,\n} satisfies ApifyCrawleeOneIO;\n"]}
|