@crawlee/http 3.9.3-beta.49 → 3.9.3-beta.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +1 -0
- package/index.js.map +1 -1
- package/index.mjs +2 -0
- package/internals/file-download.d.ts +101 -0
- package/internals/file-download.d.ts.map +1 -0
- package/internals/file-download.js +161 -0
- package/internals/file-download.js.map +1 -0
- package/package.json +5 -5
- package/tsconfig.build.tsbuildinfo +1 -1
package/index.d.ts
CHANGED
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,0BAA0B,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,0BAA0B,CAAC;AACzC,cAAc,2BAA2B,CAAC"}
|
package/index.js
CHANGED
|
@@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
const tslib_1 = require("tslib");
|
|
4
4
|
tslib_1.__exportStar(require("@crawlee/basic"), exports);
|
|
5
5
|
tslib_1.__exportStar(require("./internals/http-crawler"), exports);
|
|
6
|
+
tslib_1.__exportStar(require("./internals/file-download"), exports);
|
|
6
7
|
//# sourceMappingURL=index.js.map
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,yDAA+B;AAC/B,mEAAyC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,yDAA+B;AAC/B,mEAAyC;AACzC,oEAA0C"}
|
package/index.mjs
CHANGED
|
@@ -18,6 +18,7 @@ export const ErrorSnapshotter = mod.ErrorSnapshotter;
|
|
|
18
18
|
export const ErrorTracker = mod.ErrorTracker;
|
|
19
19
|
export const EventManager = mod.EventManager;
|
|
20
20
|
export const EventType = mod.EventType;
|
|
21
|
+
export const FileDownload = mod.FileDownload;
|
|
21
22
|
export const HttpCrawler = mod.HttpCrawler;
|
|
22
23
|
export const KeyValueStore = mod.KeyValueStore;
|
|
23
24
|
export const LocalEventManager = mod.LocalEventManager;
|
|
@@ -65,6 +66,7 @@ export const constructRegExpObjectsFromRegExps = mod.constructRegExpObjectsFromR
|
|
|
65
66
|
export const cookieStringToToughCookie = mod.cookieStringToToughCookie;
|
|
66
67
|
export const createBasicRouter = mod.createBasicRouter;
|
|
67
68
|
export const createDeserialize = mod.createDeserialize;
|
|
69
|
+
export const createFileRouter = mod.createFileRouter;
|
|
68
70
|
export const createHttpRouter = mod.createHttpRouter;
|
|
69
71
|
export const createRequestOptions = mod.createRequestOptions;
|
|
70
72
|
export const createRequests = mod.createRequests;
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import type { Dictionary } from '@crawlee/types';
|
|
2
|
+
import type { ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RouterRoutes } from '../index';
|
|
3
|
+
import { HttpCrawler } from '../index';
|
|
4
|
+
export type FileDownloadErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
5
|
+
JSONData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
|
|
6
|
+
export type StreamHandlerContext = Omit<FileDownloadCrawlingContext, 'body' | 'response' | 'parseWithCheerio' | 'json' | 'addRequests' | 'contentType'> & {
|
|
7
|
+
stream: ReadableStream;
|
|
8
|
+
};
|
|
9
|
+
type StreamHandler = (context: StreamHandlerContext) => void | Promise<void>;
|
|
10
|
+
export type FileDownloadOptions<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
11
|
+
JSONData extends Dictionary = any> = (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
|
|
12
|
+
requestHandler?: never;
|
|
13
|
+
streamHandler?: StreamHandler;
|
|
14
|
+
}) | (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
|
|
15
|
+
requestHandler: FileDownloadRequestHandler;
|
|
16
|
+
streamHandler?: never;
|
|
17
|
+
});
|
|
18
|
+
export type FileDownloadHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
19
|
+
JSONData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData, JSONData>>;
|
|
20
|
+
export interface FileDownloadCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
21
|
+
JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData, FileDownload> {
|
|
22
|
+
}
|
|
23
|
+
export type FileDownloadRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
24
|
+
JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
|
|
25
|
+
/**
|
|
26
|
+
* Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
|
|
27
|
+
*
|
|
28
|
+
* Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwith-efficient.
|
|
29
|
+
* However, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,
|
|
30
|
+
* you might need to use {@apilink CheerioCrawler}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead.
|
|
31
|
+
*
|
|
32
|
+
* `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@apilink FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
|
|
33
|
+
*
|
|
34
|
+
* The source URLs are represented using {@apilink Request} objects that are fed from {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink FileDownloadOptions.requestList} or {@apilink FileDownloadOptions.requestQueue} constructor options, respectively.
|
|
35
|
+
*
|
|
36
|
+
* If both {@apilink FileDownloadOptions.requestList} and {@apilink FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
|
|
37
|
+
*
|
|
38
|
+
* The crawler finishes when there are no more {@apilink Request} objects to crawl.
|
|
39
|
+
*
|
|
40
|
+
* We can use the `preNavigationHooks` to adjust `gotOptions`:
|
|
41
|
+
*
|
|
42
|
+
* ```
|
|
43
|
+
* preNavigationHooks: [
|
|
44
|
+
* (crawlingContext, gotOptions) => {
|
|
45
|
+
* // ...
|
|
46
|
+
* },
|
|
47
|
+
* ]
|
|
48
|
+
* ```
|
|
49
|
+
*
|
|
50
|
+
* New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@apilink AutoscaledPool} class. All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@apilink AutoscaledPool} options are available directly in the `FileCrawler` constructor.
|
|
51
|
+
*
|
|
52
|
+
* ## Example usage
|
|
53
|
+
*
|
|
54
|
+
* ```ts
|
|
55
|
+
* const crawler = new FileDownloader({
|
|
56
|
+
* requestHandler({ body, request }) {
|
|
57
|
+
* writeFileSync(request.url.replace(/[^a-z0-9\.]/gi, '_'), body);
|
|
58
|
+
* },
|
|
59
|
+
* });
|
|
60
|
+
*
|
|
61
|
+
* await crawler.run([
|
|
62
|
+
* 'http://www.example.com/document.pdf',
|
|
63
|
+
* 'http://www.example.com/sound.mp3',
|
|
64
|
+
* 'http://www.example.com/video.mkv',
|
|
65
|
+
* ]);
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContext> {
|
|
69
|
+
private streamHandler?;
|
|
70
|
+
constructor(options?: FileDownloadOptions);
|
|
71
|
+
protected _runRequestHandler(context: FileDownloadCrawlingContext): Promise<void>;
|
|
72
|
+
private streamRequestHandler;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Creates new {@apilink Router} instance that works based on request labels.
|
|
76
|
+
* This instance can then serve as a `requestHandler` of your {@apilink FileDownload}.
|
|
77
|
+
* Defaults to the {@apilink FileDownloadCrawlingContext}.
|
|
78
|
+
*
|
|
79
|
+
* > Serves as a shortcut for using `Router.create<FileDownloadCrawlingContext>()`.
|
|
80
|
+
*
|
|
81
|
+
* ```ts
|
|
82
|
+
* import { FileDownload, createFileRouter } from 'crawlee';
|
|
83
|
+
*
|
|
84
|
+
* const router = createFileRouter();
|
|
85
|
+
* router.addHandler('label-a', async (ctx) => {
|
|
86
|
+
* ctx.log.info('...');
|
|
87
|
+
* });
|
|
88
|
+
* router.addDefaultHandler(async (ctx) => {
|
|
89
|
+
* ctx.log.info('...');
|
|
90
|
+
* });
|
|
91
|
+
*
|
|
92
|
+
* const crawler = new FileDownload({
|
|
93
|
+
* requestHandler: router,
|
|
94
|
+
* });
|
|
95
|
+
* await crawler.run();
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
99
|
+
export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("packages/core/dist/router").RouterHandler<Context>;
|
|
100
|
+
export {};
|
|
101
|
+
//# sourceMappingURL=file-download.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EACR,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,YAAY,EACf,MAAM,UAAU,CAAC;AAClB,OAAO,EACH,WAAW,EAEd,MAAM,UAAU,CAAC;AAElB,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,MAAM,oBAAoB,GAAG,IAAI,CAAC,2BAA2B,EAAE,MAAM,GAAG,UAAU,GAAG,kBAAkB,GAAG,MAAM,GAAG,aAAa,GAAG,aAAa,CAAC,GAAG;IACtJ,MAAM,EAAE,cAAc,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAE7E,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IAGjC,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAE,GAAG;IAAE,cAAc,CAAC,EAAE,KAAK,CAAC;IAAC,aAAa,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAE1J,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAE,GAAG;IAAE,cAAc,EAAE,0BAA0B,CAAC;IAAC,aAAa,CAAC,EAAE,KAAK,CAAA;CAAE,CAAC,CAAC;AAE3K,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEtE,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,YAAY,CAAC;CAAG;AAE1E,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEpE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,WAAW,CAAC,2BAA2B,CAAC;IACtE,OAAO,CAAC,aAAa,CAAC,CAAgB;gBAE1B,OAAO,GAAE,mBAAwB;cAqBpB,kBAAkB,CAAC,OAAO,EAAE,2BAA2B;YAQlE,oBAAoB;CAgErC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,8DAEzC"}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createFileRouter = exports.FileDownload = void 0;
|
|
4
|
+
const promises_1 = require("stream/promises");
|
|
5
|
+
const types_1 = require("util/types");
|
|
6
|
+
const index_1 = require("../index");
|
|
7
|
+
/**
|
|
8
|
+
* Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
|
|
9
|
+
*
|
|
10
|
+
* Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwith-efficient.
|
|
11
|
+
* However, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,
|
|
12
|
+
* you might need to use {@apilink CheerioCrawler}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead.
|
|
13
|
+
*
|
|
14
|
+
* `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@apilink FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
|
|
15
|
+
*
|
|
16
|
+
* The source URLs are represented using {@apilink Request} objects that are fed from {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink FileDownloadOptions.requestList} or {@apilink FileDownloadOptions.requestQueue} constructor options, respectively.
|
|
17
|
+
*
|
|
18
|
+
* If both {@apilink FileDownloadOptions.requestList} and {@apilink FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
|
|
19
|
+
*
|
|
20
|
+
* The crawler finishes when there are no more {@apilink Request} objects to crawl.
|
|
21
|
+
*
|
|
22
|
+
* We can use the `preNavigationHooks` to adjust `gotOptions`:
|
|
23
|
+
*
|
|
24
|
+
* ```
|
|
25
|
+
* preNavigationHooks: [
|
|
26
|
+
* (crawlingContext, gotOptions) => {
|
|
27
|
+
* // ...
|
|
28
|
+
* },
|
|
29
|
+
* ]
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@apilink AutoscaledPool} class. All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@apilink AutoscaledPool} options are available directly in the `FileCrawler` constructor.
|
|
33
|
+
*
|
|
34
|
+
* ## Example usage
|
|
35
|
+
*
|
|
36
|
+
* ```ts
|
|
37
|
+
* const crawler = new FileDownloader({
|
|
38
|
+
* requestHandler({ body, request }) {
|
|
39
|
+
* writeFileSync(request.url.replace(/[^a-z0-9\.]/gi, '_'), body);
|
|
40
|
+
* },
|
|
41
|
+
* });
|
|
42
|
+
*
|
|
43
|
+
* await crawler.run([
|
|
44
|
+
* 'http://www.example.com/document.pdf',
|
|
45
|
+
* 'http://www.example.com/sound.mp3',
|
|
46
|
+
* 'http://www.example.com/video.mkv',
|
|
47
|
+
* ]);
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
class FileDownload extends index_1.HttpCrawler {
|
|
51
|
+
constructor(options = {}) {
|
|
52
|
+
const { streamHandler } = options;
|
|
53
|
+
delete options.streamHandler;
|
|
54
|
+
if (streamHandler) {
|
|
55
|
+
// For streams, the navigation is done in the request handler.
|
|
56
|
+
options.requestHandlerTimeoutSecs = options.navigationTimeoutSecs ?? 120;
|
|
57
|
+
}
|
|
58
|
+
super(options);
|
|
59
|
+
Object.defineProperty(this, "streamHandler", {
|
|
60
|
+
enumerable: true,
|
|
61
|
+
configurable: true,
|
|
62
|
+
writable: true,
|
|
63
|
+
value: void 0
|
|
64
|
+
});
|
|
65
|
+
this.streamHandler = streamHandler;
|
|
66
|
+
if (this.streamHandler) {
|
|
67
|
+
this.requestHandler = this.streamRequestHandler;
|
|
68
|
+
}
|
|
69
|
+
// The base HttpCrawler class only supports a handful of text based mime types.
|
|
70
|
+
// With the FileDownload crawler, we want to download any file type.
|
|
71
|
+
this.supportedMimeTypes = new Set(['*/*']);
|
|
72
|
+
}
|
|
73
|
+
async _runRequestHandler(context) {
|
|
74
|
+
if (this.streamHandler) {
|
|
75
|
+
context.request.skipNavigation = true;
|
|
76
|
+
}
|
|
77
|
+
await super._runRequestHandler(context);
|
|
78
|
+
}
|
|
79
|
+
async streamRequestHandler(context) {
|
|
80
|
+
const { log, request: { url } } = context;
|
|
81
|
+
const { gotScraping } = await import('got-scraping');
|
|
82
|
+
const stream = gotScraping.stream({
|
|
83
|
+
url,
|
|
84
|
+
timeout: { request: undefined },
|
|
85
|
+
proxyUrl: context.proxyInfo?.url,
|
|
86
|
+
isStream: true,
|
|
87
|
+
});
|
|
88
|
+
let pollingInterval;
|
|
89
|
+
const cleanUp = () => {
|
|
90
|
+
clearInterval(pollingInterval);
|
|
91
|
+
stream.destroy();
|
|
92
|
+
};
|
|
93
|
+
const downloadPromise = new Promise((resolve, reject) => {
|
|
94
|
+
pollingInterval = setInterval(() => {
|
|
95
|
+
const { total, transferred } = stream.downloadProgress;
|
|
96
|
+
if (transferred > 0) {
|
|
97
|
+
log.debug(`Downloaded ${transferred} bytes of ${total ?? 0} bytes from ${url}.`);
|
|
98
|
+
}
|
|
99
|
+
}, 5000);
|
|
100
|
+
stream.on('error', async (error) => {
|
|
101
|
+
cleanUp();
|
|
102
|
+
reject(error);
|
|
103
|
+
});
|
|
104
|
+
let streamHandlerResult;
|
|
105
|
+
try {
|
|
106
|
+
context.stream = stream;
|
|
107
|
+
streamHandlerResult = this.streamHandler(context);
|
|
108
|
+
}
|
|
109
|
+
catch (e) {
|
|
110
|
+
cleanUp();
|
|
111
|
+
reject(e);
|
|
112
|
+
}
|
|
113
|
+
if ((0, types_1.isPromise)(streamHandlerResult)) {
|
|
114
|
+
streamHandlerResult.then(() => {
|
|
115
|
+
resolve();
|
|
116
|
+
}).catch((e) => {
|
|
117
|
+
cleanUp();
|
|
118
|
+
reject(e);
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
resolve();
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
await Promise.all([
|
|
126
|
+
downloadPromise,
|
|
127
|
+
(0, promises_1.finished)(stream),
|
|
128
|
+
]);
|
|
129
|
+
cleanUp();
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
exports.FileDownload = FileDownload;
|
|
133
|
+
/**
|
|
134
|
+
* Creates new {@apilink Router} instance that works based on request labels.
|
|
135
|
+
* This instance can then serve as a `requestHandler` of your {@apilink FileDownload}.
|
|
136
|
+
* Defaults to the {@apilink FileDownloadCrawlingContext}.
|
|
137
|
+
*
|
|
138
|
+
* > Serves as a shortcut for using `Router.create<FileDownloadCrawlingContext>()`.
|
|
139
|
+
*
|
|
140
|
+
* ```ts
|
|
141
|
+
* import { FileDownload, createFileRouter } from 'crawlee';
|
|
142
|
+
*
|
|
143
|
+
* const router = createFileRouter();
|
|
144
|
+
* router.addHandler('label-a', async (ctx) => {
|
|
145
|
+
* ctx.log.info('...');
|
|
146
|
+
* });
|
|
147
|
+
* router.addDefaultHandler(async (ctx) => {
|
|
148
|
+
* ctx.log.info('...');
|
|
149
|
+
* });
|
|
150
|
+
*
|
|
151
|
+
* const crawler = new FileDownload({
|
|
152
|
+
* requestHandler: router,
|
|
153
|
+
* });
|
|
154
|
+
* await crawler.run();
|
|
155
|
+
* ```
|
|
156
|
+
*/
|
|
157
|
+
function createFileRouter(routes) {
|
|
158
|
+
return index_1.Router.create(routes);
|
|
159
|
+
}
|
|
160
|
+
exports.createFileRouter = createFileRouter;
|
|
161
|
+
//# sourceMappingURL=file-download.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":";;;AAAA,8CAA2C;AAC3C,sCAAuC;AAavC,oCAGkB;AAqClB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAa,YAAa,SAAQ,mBAAwC;IAGtE,YAAY,UAA+B,EAAE;QACzC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;QAClC,OAAO,OAAO,CAAC,aAAa,CAAC;QAE7B,IAAI,aAAa,EAAE,CAAC;YAChB,8DAA8D;YAC7D,OAAe,CAAC,yBAAyB,GAAG,OAAO,CAAC,qBAAqB,IAAI,GAAG,CAAC;QACtF,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,CAAC;QAXX;;;;;WAA8B;QAalC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,oBAAoB,CAAC;QACpD,CAAC;QAED,+EAA+E;QAC/E,oEAAoE;QACnE,IAAY,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IACxD,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAAoC;QAC5E,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,OAAO,CAAC,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;QAC1C,CAAC;QAED,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,OAAoC;QACnE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC;QAE1C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;QAErD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;YAC9B,GAAG;YACH,OAAO,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,SAAS,EAAE,GAAG;YAChC,QAAQ,EAAE,IAAI;SACjB,CAAC,CAAC;QAEH,IAAI,eAA2C,CAAC;QAEhD,MAAM,OAAO,GAAG,GAAG,EAAE;YACjB,aAAa,CAAC,eAAgB,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC,CAAC;QAEF,MAAM,eAAe,GAAG,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1D,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC/B,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,MAAM,CAAC,gBAAgB,CAAC;gBAEvD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;oBAClB,GAAG,CAAC,KAAK,CACL,cAAc,WAAW,aAAa,KAAK,IAAI,CAAC,eAAe,GAAG,GAAG,CACxE,CAAC;gBACN,CAAC;YACL,CAAC,EAAE,IAAI,CAAC,CAAC;YAET,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,KAAY,EAAE,EAAE;gBACtC,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,KAAK,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,mBAAmB,CAAC;YAExB,IAAI,CAAC;gBACD,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC;gBACxB,mBAAmB,GAAG,IAAI,CAAC,aAAc,CAAC,OAAc,CAAC,CAAC;YAC9D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,CAAC,CAAC,CAAC;YACd,CAAC;YAED,IAAI,IAAA,iBAAS,EAAC,mBAAmB,CAAC,EAAE,CAAC;gBACjC,mBAAmB,CAAC,IAAI,CAAC,GAAG,EAAE;oBAC1B,OAAO,EAAE,CAAC;gBACd,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAQ,EAAE,EAAE;oBAClB,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,CAAC,CAAC,CAAC;gBACd,CAAC,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,OAAO,EAAE,CAAC;YACd,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CAAC;YACd,eAAe;YACf,IAAA,mBAAQ,EAAC,MAAM,CAAC;SACnB,CAAC,CAAC;QAEH,OAAO,EAAE,CAAC;IACd,CAAC;CACJ;AAhGD,oCAgGC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,cAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC;AALD,4CAKC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/http",
|
|
3
|
-
"version": "3.9.3-beta.
|
|
3
|
+
"version": "3.9.3-beta.50",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -55,9 +55,9 @@
|
|
|
55
55
|
"dependencies": {
|
|
56
56
|
"@apify/timeout": "^0.3.0",
|
|
57
57
|
"@apify/utilities": "^2.7.10",
|
|
58
|
-
"@crawlee/basic": "3.9.3-beta.
|
|
59
|
-
"@crawlee/types": "3.9.3-beta.
|
|
60
|
-
"@crawlee/utils": "3.9.3-beta.
|
|
58
|
+
"@crawlee/basic": "3.9.3-beta.50",
|
|
59
|
+
"@crawlee/types": "3.9.3-beta.50",
|
|
60
|
+
"@crawlee/utils": "3.9.3-beta.50",
|
|
61
61
|
"@types/content-type": "^1.1.5",
|
|
62
62
|
"cheerio": "^1.0.0-rc.12",
|
|
63
63
|
"content-type": "^1.0.4",
|
|
@@ -75,5 +75,5 @@
|
|
|
75
75
|
}
|
|
76
76
|
}
|
|
77
77
|
},
|
|
78
|
-
"gitHead": "
|
|
78
|
+
"gitHead": "b4d36bdb7cb3de644b5c286ff634b110cf9ab580"
|
|
79
79
|
}
|